{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Recurrent Neural Networks for Language Modeling \n",
    "\n",
    "Recurrent Neural Networks (RNNs) is a powerful family of neural networks that are widely used for sequence modeling tasks (e.g. stock price prediction, language modeling). RNNs ability to exploit temporal dependecies of entities in a sequence makes them powerful. In this exercise we will model a RNN and learn tips and tricks to improve the performance.\n",
    "\n",
    "In this exercise, we will do the following.\n",
    "1. Create word vectors for a dataset created from stories available at http://clarkesworldmagazine.com/\n",
    "2. Train a RNN model on the dataset and use it to output a new story"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\users\\thushan\\documents\\python_virtualenvs\\tensorflow_venv\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n"
     ]
    }
   ],
   "source": [
    "# These are all the modules we'll be using later. Make sure you can import them\n",
    "# before proceeding further.\n",
    "%matplotlib inline\n",
    "from __future__ import print_function\n",
    "import collections\n",
    "import math\n",
    "import numpy as np\n",
    "import os\n",
    "import random\n",
    "import tensorflow as tf\n",
    "import zipfile\n",
    "from matplotlib import pylab\n",
    "from six.moves import range\n",
    "from six.moves.urllib.request import urlretrieve\n",
    "import tensorflow as tf\n",
    "from scipy.sparse import lil_matrix\n",
    "#import nltk\n",
    "#nltk.download() #tokenizers/punkt/PY3/english.pickle"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Downloading Data\n",
    "\n",
    "Downloading stories if not present in disk. There should be 100 files ('stories/001.txt','stories/002.txt', ...)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading file:  stories\\001.txt\n",
      "File  001.txt  already exists.\n",
      "Downloading file:  stories\\002.txt\n",
      "File  002.txt  already exists.\n",
      "Downloading file:  stories\\003.txt\n",
      "File  003.txt  already exists.\n",
      "Downloading file:  stories\\004.txt\n",
      "File  004.txt  already exists.\n",
      "Downloading file:  stories\\005.txt\n",
      "File  005.txt  already exists.\n",
      "Downloading file:  stories\\006.txt\n",
      "File  006.txt  already exists.\n",
      "Downloading file:  stories\\007.txt\n",
      "File  007.txt  already exists.\n",
      "Downloading file:  stories\\008.txt\n",
      "File  008.txt  already exists.\n",
      "Downloading file:  stories\\009.txt\n",
      "File  009.txt  already exists.\n",
      "Downloading file:  stories\\010.txt\n",
      "File  010.txt  already exists.\n",
      "Downloading file:  stories\\011.txt\n",
      "File  011.txt  already exists.\n",
      "Downloading file:  stories\\012.txt\n",
      "File  012.txt  already exists.\n",
      "Downloading file:  stories\\013.txt\n",
      "File  013.txt  already exists.\n",
      "Downloading file:  stories\\014.txt\n",
      "File  014.txt  already exists.\n",
      "Downloading file:  stories\\015.txt\n",
      "File  015.txt  already exists.\n",
      "Downloading file:  stories\\016.txt\n",
      "File  016.txt  already exists.\n",
      "Downloading file:  stories\\017.txt\n",
      "File  017.txt  already exists.\n",
      "Downloading file:  stories\\018.txt\n",
      "File  018.txt  already exists.\n",
      "Downloading file:  stories\\019.txt\n",
      "File  019.txt  already exists.\n",
      "Downloading file:  stories\\020.txt\n",
      "File  020.txt  already exists.\n",
      "Downloading file:  stories\\021.txt\n",
      "File  021.txt  already exists.\n",
      "Downloading file:  stories\\022.txt\n",
      "File  022.txt  already exists.\n",
      "Downloading file:  stories\\023.txt\n",
      "File  023.txt  already exists.\n",
      "Downloading file:  stories\\024.txt\n",
      "File  024.txt  already exists.\n",
      "Downloading file:  stories\\025.txt\n",
      "File  025.txt  already exists.\n",
      "Downloading file:  stories\\026.txt\n",
      "File  026.txt  already exists.\n",
      "Downloading file:  stories\\027.txt\n",
      "File  027.txt  already exists.\n",
      "Downloading file:  stories\\028.txt\n",
      "File  028.txt  already exists.\n",
      "Downloading file:  stories\\029.txt\n",
      "File  029.txt  already exists.\n",
      "Downloading file:  stories\\030.txt\n",
      "File  030.txt  already exists.\n",
      "Downloading file:  stories\\031.txt\n",
      "File  031.txt  already exists.\n",
      "Downloading file:  stories\\032.txt\n",
      "File  032.txt  already exists.\n",
      "Downloading file:  stories\\033.txt\n",
      "File  033.txt  already exists.\n",
      "Downloading file:  stories\\034.txt\n",
      "File  034.txt  already exists.\n",
      "Downloading file:  stories\\035.txt\n",
      "File  035.txt  already exists.\n",
      "Downloading file:  stories\\036.txt\n",
      "File  036.txt  already exists.\n",
      "Downloading file:  stories\\037.txt\n",
      "File  037.txt  already exists.\n",
      "Downloading file:  stories\\038.txt\n",
      "File  038.txt  already exists.\n",
      "Downloading file:  stories\\039.txt\n",
      "File  039.txt  already exists.\n",
      "Downloading file:  stories\\040.txt\n",
      "File  040.txt  already exists.\n",
      "Downloading file:  stories\\041.txt\n",
      "File  041.txt  already exists.\n",
      "Downloading file:  stories\\042.txt\n",
      "File  042.txt  already exists.\n",
      "Downloading file:  stories\\043.txt\n",
      "File  043.txt  already exists.\n",
      "Downloading file:  stories\\044.txt\n",
      "File  044.txt  already exists.\n",
      "Downloading file:  stories\\045.txt\n",
      "File  045.txt  already exists.\n",
      "Downloading file:  stories\\046.txt\n",
      "File  046.txt  already exists.\n",
      "Downloading file:  stories\\047.txt\n",
      "File  047.txt  already exists.\n",
      "Downloading file:  stories\\048.txt\n",
      "File  048.txt  already exists.\n",
      "Downloading file:  stories\\049.txt\n",
      "File  049.txt  already exists.\n",
      "Downloading file:  stories\\050.txt\n",
      "File  050.txt  already exists.\n",
      "Downloading file:  stories\\051.txt\n",
      "File  051.txt  already exists.\n",
      "Downloading file:  stories\\052.txt\n",
      "File  052.txt  already exists.\n",
      "Downloading file:  stories\\053.txt\n",
      "File  053.txt  already exists.\n",
      "Downloading file:  stories\\054.txt\n",
      "File  054.txt  already exists.\n",
      "Downloading file:  stories\\055.txt\n",
      "File  055.txt  already exists.\n",
      "Downloading file:  stories\\056.txt\n",
      "File  056.txt  already exists.\n",
      "Downloading file:  stories\\057.txt\n",
      "File  057.txt  already exists.\n",
      "Downloading file:  stories\\058.txt\n",
      "File  058.txt  already exists.\n",
      "Downloading file:  stories\\059.txt\n",
      "File  059.txt  already exists.\n",
      "Downloading file:  stories\\060.txt\n",
      "File  060.txt  already exists.\n",
      "Downloading file:  stories\\061.txt\n",
      "File  061.txt  already exists.\n",
      "Downloading file:  stories\\062.txt\n",
      "File  062.txt  already exists.\n",
      "Downloading file:  stories\\063.txt\n",
      "File  063.txt  already exists.\n",
      "Downloading file:  stories\\064.txt\n",
      "File  064.txt  already exists.\n",
      "Downloading file:  stories\\065.txt\n",
      "File  065.txt  already exists.\n",
      "Downloading file:  stories\\066.txt\n",
      "File  066.txt  already exists.\n",
      "Downloading file:  stories\\067.txt\n",
      "File  067.txt  already exists.\n",
      "Downloading file:  stories\\068.txt\n",
      "File  068.txt  already exists.\n",
      "Downloading file:  stories\\069.txt\n",
      "File  069.txt  already exists.\n",
      "Downloading file:  stories\\070.txt\n",
      "File  070.txt  already exists.\n",
      "Downloading file:  stories\\071.txt\n",
      "File  071.txt  already exists.\n",
      "Downloading file:  stories\\072.txt\n",
      "File  072.txt  already exists.\n",
      "Downloading file:  stories\\073.txt\n",
      "File  073.txt  already exists.\n",
      "Downloading file:  stories\\074.txt\n",
      "File  074.txt  already exists.\n",
      "Downloading file:  stories\\075.txt\n",
      "File  075.txt  already exists.\n",
      "Downloading file:  stories\\076.txt\n",
      "File  076.txt  already exists.\n",
      "Downloading file:  stories\\077.txt\n",
      "File  077.txt  already exists.\n",
      "Downloading file:  stories\\078.txt\n",
      "File  078.txt  already exists.\n",
      "Downloading file:  stories\\079.txt\n",
      "File  079.txt  already exists.\n",
      "Downloading file:  stories\\080.txt\n",
      "File  080.txt  already exists.\n",
      "Downloading file:  stories\\081.txt\n",
      "File  081.txt  already exists.\n",
      "Downloading file:  stories\\082.txt\n",
      "File  082.txt  already exists.\n",
      "Downloading file:  stories\\083.txt\n",
      "File  083.txt  already exists.\n",
      "Downloading file:  stories\\084.txt\n",
      "File  084.txt  already exists.\n",
      "Downloading file:  stories\\085.txt\n",
      "File  085.txt  already exists.\n",
      "Downloading file:  stories\\086.txt\n",
      "File  086.txt  already exists.\n",
      "Downloading file:  stories\\087.txt\n",
      "File  087.txt  already exists.\n",
      "Downloading file:  stories\\088.txt\n",
      "File  088.txt  already exists.\n",
      "Downloading file:  stories\\089.txt\n",
      "File  089.txt  already exists.\n",
      "Downloading file:  stories\\090.txt\n",
      "File  090.txt  already exists.\n",
      "Downloading file:  stories\\091.txt\n",
      "File  091.txt  already exists.\n",
      "Downloading file:  stories\\092.txt\n",
      "File  092.txt  already exists.\n",
      "Downloading file:  stories\\093.txt\n",
      "File  093.txt  already exists.\n",
      "Downloading file:  stories\\094.txt\n",
      "File  094.txt  already exists.\n",
      "Downloading file:  stories\\095.txt\n",
      "File  095.txt  already exists.\n",
      "Downloading file:  stories\\096.txt\n",
      "File  096.txt  already exists.\n",
      "Downloading file:  stories\\097.txt\n",
      "File  097.txt  already exists.\n",
      "Downloading file:  stories\\098.txt\n",
      "File  098.txt  already exists.\n",
      "Downloading file:  stories\\099.txt\n",
      "File  099.txt  already exists.\n",
      "Downloading file:  stories\\100.txt\n",
      "File  100.txt  already exists.\n"
     ]
    }
   ],
   "source": [
    "url = 'https://www.cs.cmu.edu/~spok/grimmtmp/'\n",
    "\n",
    "# Create a directory if needed\n",
    "dir_name = 'stories'\n",
    "if not os.path.exists(dir_name):\n",
    "    os.mkdir(dir_name)\n",
    "    \n",
    "def maybe_download(filename):\n",
    "  \"\"\"Download a file if not present\"\"\"\n",
    "  print('Downloading file: ', dir_name+ os.sep+filename)\n",
    "    \n",
    "  if not os.path.exists(dir_name+os.sep+filename):\n",
    "    filename, _ = urlretrieve(url + filename, dir_name+os.sep+filename)\n",
    "  else:\n",
    "    print('File ',filename, ' already exists.')\n",
    "  \n",
    "  return filename\n",
    "\n",
    "num_files = 100\n",
    "filenames = [format(i, '03d')+'.txt' for i in range(1,101)]\n",
    "\n",
    "for fn in filenames:\n",
    "    maybe_download(fn)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reading data\n",
    "Data will be stored in a list of lists where the each list represents a document and document is a list of words. We will then break the text into bigrams"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Processing file stories\\001.txt\n",
      "Data size (Characters) (Document 0) 3667\n",
      "Sample string (Document 0) ['in', ' o', 'ld', 'en', ' t', 'im', 'es', ' w', 'he', 'n ', 'wi', 'sh', 'in', 'g ', 'st', 'il', 'l ', 'he', 'lp', 'ed', ' o', 'ne', ', ', 'th', 'er', 'e ', 'li', 've', 'd ', 'a ', 'ki', 'ng', '\\nw', 'ho', 'se', ' d', 'au', 'gh', 'te', 'rs', ' w', 'er', 'e ', 'al', 'l ', 'be', 'au', 'ti', 'fu', 'l,']\n",
      "\n",
      "Processing file stories\\002.txt\n",
      "Data size (Characters) (Document 1) 4928\n",
      "Sample string (Document 1) ['ha', 'rd', ' b', 'y ', 'a ', 'gr', 'ea', 't ', 'fo', 're', 'st', ' d', 'we', 'lt', ' a', ' w', 'oo', 'd-', 'cu', 'tt', 'er', ' w', 'it', 'h ', 'hi', 's ', 'wi', 'fe', ', ', 'wh', 'o ', 'ha', 'd ', 'an', '\\no', 'nl', 'y ', 'ch', 'il', 'd,', ' a', ' l', 'it', 'tl', 'e ', 'gi', 'rl', ' t', 'hr', 'ee']\n",
      "\n",
      "Processing file stories\\003.txt\n",
      "Data size (Characters) (Document 2) 9745\n",
      "Sample string (Document 2) ['a ', 'ce', 'rt', 'ai', 'n ', 'fa', 'th', 'er', ' h', 'ad', ' t', 'wo', ' s', 'on', 's,', ' t', 'he', ' e', 'ld', 'er', ' o', 'f ', 'wh', 'om', ' w', 'as', ' s', 'ma', 'rt', ' a', 'nd', '\\ns', 'en', 'si', 'bl', 'e,', ' a', 'nd', ' c', 'ou', 'ld', ' d', 'o ', 'ev', 'er', 'yt', 'hi', 'ng', ', ', 'bu']\n",
      "\n",
      "Processing file stories\\004.txt\n",
      "Data size (Characters) (Document 3) 2852\n",
      "Sample string (Document 3) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', 'n ', 'ol', 'd ', 'go', 'at', ' w', 'ho', ' h', 'ad', ' s', 'ev', 'en', ' l', 'it', 'tl', 'e ', 'ki', 'ds', ', ', 'an', 'd\\n', 'lo', 've', 'd ', 'th', 'em', ' w', 'it', 'h ', 'al', 'l ', 'th', 'e ', 'lo', 've', ' o']\n",
      "\n",
      "Processing file stories\\005.txt\n",
      "Data size (Characters) (Document 4) 8189\n",
      "Sample string (Document 4) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', 'n ', 'ol', 'd ', 'ki', 'ng', ' w', 'ho', ' w', 'as', ' i', 'll', ' a', 'nd', ' t', 'ho', 'ug', 'ht', ' t', 'o\\n', 'hi', 'ms', 'el', 'f ', \"'i\", ' a', 'm ', 'ly', 'in', 'g ', 'on', ' w', 'ha', 't ', 'mu', 'st', ' b']\n",
      "\n",
      "Processing file stories\\006.txt\n",
      "Data size (Characters) (Document 5) 4369\n",
      "Sample string (Document 5) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' p', 'ea', 'sa', 'nt', ' w', 'ho', ' h', 'ad', ' d', 'ri', 've', 'n ', 'hi', 's ', 'co', 'w ', 'to', ' t', 'he', ' f', 'ai', 'r,', ' a', 'nd', ' s', 'ol', 'd\\n', 'he', 'r ', 'fo', 'r ', 'se', 've', 'n ', 'ta', 'le', 'rs', '. ', ' o', 'n ', 'th', 'e ']\n",
      "\n",
      "Processing file stories\\007.txt\n",
      "Data size (Characters) (Document 6) 5216\n",
      "Sample string (Document 6) ['th', 'er', 'e ', 'we', 're', ' o', 'nc', 'e ', 'up', 'on', ' a', ' t', 'im', 'e ', 'a ', 'ki', 'ng', ' a', 'nd', ' a', ' q', 'ue', 'en', ' w', 'ho', ' l', 'iv', 'ed', '\\nh', 'ap', 'pi', 'ly', ' t', 'og', 'et', 'he', 'r ', 'an', 'd ', 'ha', 'd ', 'tw', 'el', 've', ' c', 'hi', 'ld', 're', 'n,', ' b']\n",
      "\n",
      "Processing file stories\\008.txt\n",
      "Data size (Characters) (Document 7) 6097\n",
      "Sample string (Document 7) ['li', 'tt', 'le', ' b', 'ro', 'th', 'er', ' t', 'oo', 'k ', 'hi', 's ', 'li', 'tt', 'le', ' s', 'is', 'te', 'r ', 'by', ' t', 'he', ' h', 'an', 'd ', 'an', 'd ', 'sa', 'id', ', ', 'si', 'nc', 'e\\n', 'ou', 'r ', 'mo', 'th', 'er', ' d', 'ie', 'd ', 'we', ' h', 'av', 'e ', 'ha', 'd ', 'no', ' h', 'ap']\n",
      "\n",
      "Processing file stories\\009.txt\n",
      "Data size (Characters) (Document 8) 3699\n",
      "Sample string (Document 8) ['th', 'er', 'e ', 'we', 're', ' o', 'nc', 'e ', 'a ', 'ma', 'n ', 'an', 'd ', 'a ', 'wo', 'ma', 'n ', 'wh', 'o ', 'ha', 'd ', 'lo', 'ng', ' i', 'n ', 'va', 'in', '\\nw', 'is', 'he', 'd ', 'fo', 'r ', 'a ', 'ch', 'il', 'd.', '  ', 'at', ' l', 'en', 'gt', 'h ', 'th', 'e ', 'wo', 'ma', 'n ', 'ho', 'pe']\n",
      "\n",
      "Processing file stories\\010.txt\n",
      "Data size (Characters) (Document 9) 5268\n",
      "Sample string (Document 9) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' m', 'an', ' w', 'ho', 'se', ' w', 'if', 'e ', 'di', 'ed', ', ', 'an', 'd ', 'a ', 'wo', 'ma', 'n ', 'wh', 'os', 'e ', 'hu', 'sb', 'an', 'd\\n', 'di', 'ed', ', ', 'an', 'd ', 'th', 'e ', 'ma', 'n ', 'ha', 'd ', 'a ', 'da', 'ug', 'ht', 'er', ', ', 'an']\n",
      "\n",
      "Processing file stories\\011.txt\n",
      "Data size (Characters) (Document 10) 2377\n",
      "Sample string (Document 10) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' g', 'ir', 'l ', 'wh', 'o ', 'wa', 's ', 'id', 'le', ' a', 'nd', ' w', 'ou', 'ld', ' n', 'ot', ' s', 'pi', 'n,', ' a', 'nd', '\\nl', 'et', ' h', 'er', ' m', 'ot', 'he', 'r ', 'sa', 'y ', 'wh', 'at', ' s', 'he', ' w', 'ou', 'ld', ', ', 'sh', 'e ', 'co']\n",
      "\n",
      "Processing file stories\\012.txt\n",
      "Data size (Characters) (Document 11) 7695\n",
      "Sample string (Document 11) ['ha', 'rd', ' b', 'y ', 'a ', 'gr', 'ea', 't ', 'fo', 're', 'st', ' d', 'we', 'lt', ' a', ' p', 'oo', 'r ', 'wo', 'od', '-c', 'ut', 'te', 'r ', 'wi', 'th', ' h', 'is', ' w', 'if', 'e\\n', 'an', 'd ', 'hi', 's ', 'tw', 'o ', 'ch', 'il', 'dr', 'en', '. ', ' t', 'he', ' b', 'oy', ' w', 'as', ' c', 'al']\n",
      "\n",
      "Processing file stories\\013.txt\n",
      "Data size (Characters) (Document 12) 3665\n",
      "Sample string (Document 12) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' o', 'n ', 'a ', 'ti', 'me', ' a', ' p', 'oo', 'r ', 'ma', 'n,', ' w', 'ho', ' c', 'ou', 'ld', ' n', 'o ', 'lo', 'ng', 'er', '\\ns', 'up', 'po', 'rt', ' h', 'is', ' o', 'nl', 'y ', 'so', 'n.', '  ', 'th', 'en', ' s', 'ai', 'd ', 'th', 'e ', 'so', 'n,', ' d']\n",
      "\n",
      "Processing file stories\\014.txt\n",
      "Data size (Characters) (Document 13) 4178\n",
      "Sample string (Document 13) ['a ', 'lo', 'ng', ' t', 'im', 'e ', 'ag', 'o ', 'th', 'er', 'e ', 'li', 've', 'd ', 'a ', 'ki', 'ng', ' w', 'ho', ' w', 'as', ' f', 'am', 'ed', ' f', 'or', ' h', 'is', ' w', 'is', 'do', 'm\\n', 'th', 'ro', 'ug', 'h ', 'al', 'l ', 'th', 'e ', 'la', 'nd', '. ', ' n', 'ot', 'hi', 'ng', ' w', 'as', ' h']\n",
      "\n",
      "Processing file stories\\015.txt\n",
      "Data size (Characters) (Document 14) 8674\n",
      "Sample string (Document 14) ['on', 'e ', 'su', 'mm', 'er', \"'s\", ' m', 'or', 'ni', 'ng', ' a', ' l', 'it', 'tl', 'e ', 'ta', 'il', 'or', ' w', 'as', ' s', 'it', 'ti', 'ng', ' o', 'n ', 'hi', 's ', 'ta', 'bl', 'e\\n', 'by', ' t', 'he', ' w', 'in', 'do', 'w,', ' h', 'e ', 'wa', 's ', 'in', ' g', 'oo', 'd ', 'sp', 'ir', 'it', 's,']\n",
      "\n",
      "Processing file stories\\016.txt\n",
      "Data size (Characters) (Document 15) 7018\n",
      "Sample string (Document 15) ['\\tc', 'in', 'de', 're', 'll', 'a\\n', 'th', 'e ', 'wi', 'fe', ' o', 'f ', 'a ', 'ri', 'ch', ' m', 'an', ' f', 'el', 'l ', 'si', 'ck', ', ', 'an', 'd ', 'as', ' s', 'he', ' f', 'el', 't ', 'th', 'at', ' h', 'er', ' e', 'nd', '\\nw', 'as', ' d', 'ra', 'wi', 'ng', ' n', 'ea', 'r,', ' s', 'he', ' c', 'al']\n",
      "\n",
      "Processing file stories\\017.txt\n",
      "Data size (Characters) (Document 16) 3039\n",
      "Sample string (Document 16) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' k', 'in', \"g'\", 's ', 'so', 'n ', 'wh', 'o ', 'wa', 's ', 'se', 'iz', 'ed', ' w', 'it', 'h ', 'a ', 'de', 'si', 're', ' t', 'o ', 'tr', 'av', 'el', '\\na', 'bo', 'ut', ' t', 'he', ' w', 'or', 'ld', ', ', 'an', 'd ', 'to', 'ok', ' n', 'o ', 'on', 'e ']\n",
      "\n",
      "Processing file stories\\018.txt\n",
      "Data size (Characters) (Document 17) 3020\n",
      "Sample string (Document 17) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' w', 'id', 'ow', ' w', 'ho', ' h', 'ad', ' t', 'wo', ' d', 'au', 'gh', 'te', 'rs', ' -', ' o', 'ne', ' o', 'f\\n', 'wh', 'om', ' w', 'as', ' p', 're', 'tt', 'y ', 'an', 'd ', 'in', 'du', 'st', 'ri', 'ou', 's,', ' w', 'hi', 'ls', 't ', 'th', 'e ', 'ot']\n",
      "\n",
      "Processing file stories\\019.txt\n",
      "Data size (Characters) (Document 18) 2465\n",
      "Sample string (Document 18) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' m', 'an', ' w', 'ho', ' h', 'ad', ' s', 'ev', 'en', ' s', 'on', 's,', ' a', 'nd', ' s', 'ti', 'll', ' h', 'e ', 'ha', 'd\\n', 'no', ' d', 'au', 'gh', 'te', 'r,', ' h', 'ow', 'ev', 'er', ' m', 'uc', 'h ', 'he', ' w', 'is', 'he', 'd ', 'fo', 'r ', 'on']\n",
      "\n",
      "Processing file stories\\020.txt\n",
      "Data size (Characters) (Document 19) 3703\n",
      "Sample string (Document 19) ['\\tl', 'it', 'tl', 'e ', 're', 'd-', 'ca', 'p\\n', '\\no', 'nc', 'e ', 'up', 'on', ' a', ' t', 'im', 'e ', 'th', 'er', 'e ', 'wa', 's ', 'a ', 'de', 'ar', ' l', 'it', 'tl', 'e ', 'gi', 'rl', ' w', 'ho', ' w', 'as', ' l', 'ov', 'ed', '\\nb', 'y ', 'ev', 'er', 'y ', 'on', 'e ', 'wh', 'o ', 'lo', 'ok', 'ed']\n",
      "\n",
      "Processing file stories\\021.txt\n",
      "Data size (Characters) (Document 20) 1924\n",
      "Sample string (Document 20) ['in', ' a', ' c', 'er', 'ta', 'in', ' c', 'ou', 'nt', 'ry', ' t', 'he', 're', ' w', 'as', ' o', 'nc', 'e ', 'gr', 'ea', 't ', 'la', 'me', 'nt', 'at', 'io', 'n ', 'ov', 'er', ' a', '\\nw', 'il', 'd ', 'bo', 'ar', ' t', 'ha', 't ', 'la', 'id', ' w', 'as', 'te', ' t', 'he', ' f', 'ar', 'me', \"r'\", 's ']\n",
      "\n",
      "Processing file stories\\022.txt\n",
      "Data size (Characters) (Document 21) 6561\n",
      "Sample string (Document 21) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' p', 'oo', 'r ', 'wo', 'ma', 'n ', 'wh', 'o ', 'ga', 've', ' b', 'ir', 'th', ' t', 'o ', 'a ', 'li', 'tt', 'le', ' s', 'on', ',\\n', 'an', 'd ', 'as', ' h', 'e ', 'ca', 'me', ' i', 'nt', 'o ', 'th', 'e ', 'wo', 'rl', 'd ', 'wi', 'th', ' a', ' c', 'au']\n",
      "\n",
      "Processing file stories\\023.txt\n",
      "Data size (Characters) (Document 22) 5956\n",
      "Sample string (Document 22) ['a ', 'ce', 'rt', 'ai', 'n ', 'mi', 'll', 'er', ' h', 'ad', ' l', 'it', 'tl', 'e ', 'by', ' l', 'it', 'tl', 'e ', 'fa', 'll', 'en', ' i', 'nt', 'o ', 'po', 've', 'rt', 'y,', ' a', 'nd', '\\nh', 'ad', ' n', 'ot', 'hi', 'ng', ' l', 'ef', 't ', 'bu', 't ', 'hi', 's ', 'mi', 'll', ' a', 'nd', ' a', ' l']\n",
      "\n",
      "Processing file stories\\024.txt\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data size (Characters) (Document 23) 2529\n",
      "Sample string (Document 23) ['th', 'e ', 'mo', 'th', 'er', ' o', 'f ', 'ha', 'ns', ' s', 'ai', 'd,', ' w', 'hi', 'th', 'er', ' a', 'wa', 'y,', ' h', 'an', 's.', '  ', 'ha', 'ns', ' a', 'ns', 'we', 're', 'd,', ' t', 'o\\n', 'gr', 'et', 'el', '. ', ' b', 'eh', 'av', 'e ', 'we', 'll', ', ', 'ha', 'ns', '. ', ' o', 'h,', ' i', \"'l\"]\n",
      "\n",
      "Processing file stories\\025.txt\n",
      "Data size (Characters) (Document 24) 2416\n",
      "Sample string (Document 24) ['an', ' a', 'ge', 'd ', 'co', 'un', 't ', 'on', 'ce', ' l', 'iv', 'ed', ' i', 'n ', 'sw', 'it', 'ze', 'rl', 'an', 'd,', ' w', 'ho', ' h', 'ad', ' a', 'n ', 'on', 'ly', ' s', 'on', ',\\n', 'bu', 't ', 'he', ' w', 'as', ' s', 'tu', 'pi', 'd,', ' a', 'nd', ' c', 'ou', 'ld', ' l', 'ea', 'rn', ' n', 'ot']\n",
      "\n",
      "Processing file stories\\026.txt\n",
      "Data size (Characters) (Document 25) 3369\n",
      "Sample string (Document 25) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' m', 'an', ' w', 'ho', ' h', 'ad', ' a', ' d', 'au', 'gh', 'te', 'r ', 'wh', 'o ', 'wa', 's ', 'ca', 'll', 'ed', ' c', 'le', 've', 'r\\n', 'el', 'si', 'e.', '  ', 'an', 'd ', 'wh', 'en', ' s', 'he', ' h', 'ad', ' g', 'ro', 'wn', ' u', 'p ', 'he', 'r ']\n",
      "\n",
      "Processing file stories\\027.txt\n",
      "Data size (Characters) (Document 26) 10013\n",
      "Sample string (Document 26) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' t', 'ai', 'lo', 'r ', 'wh', 'o ', 'ha', 'd ', 'th', 're', 'e ', 'so', 'ns', ', ', 'an', 'd\\n', 'on', 'ly', ' o', 'ne', ' g', 'oa', 't.', '  ', 'bu', 't ', 'as', ' t', 'he', ' g', 'oa', 't ', 'su', 'pp', 'or', 'te']\n",
      "\n",
      "Processing file stories\\028.txt\n",
      "Data size (Characters) (Document 27) 5788\n",
      "Sample string (Document 27) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' p', 'oo', 'r ', 'pe', 'as', 'an', 't ', 'wh', 'o ', 'sa', 't ', 'in', ' t', 'he', ' e', 've', 'ni', 'ng', ' b', 'y ', 'th', 'e\\n', 'he', 'ar', 'th', ' a', 'nd', ' p', 'ok', 'ed', ' t', 'he', ' f', 'ir', 'e,', ' a', 'nd', ' h', 'is', ' w', 'if', 'e ']\n",
      "\n",
      "Processing file stories\\029.txt\n",
      "Data size (Characters) (Document 28) 1335\n",
      "Sample string (Document 28) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' p', 'oo', 'r ', 'se', 'rv', 'an', 't-', 'gi', 'rl', ' w', 'ho', ' w', 'as', ' i', 'nd', 'us', 'tr', 'io', 'us', ' a', 'nd', ' c', 'le', 'an', 'ly', '\\na', 'nd', ' s', 'we', 'pt', ' t', 'he', ' h', 'ou', 'se', ' e', 've', 'ry', ' d', 'ay', ', ', 'an']\n",
      "\n",
      "Processing file stories\\030.txt\n",
      "Data size (Characters) (Document 29) 3591\n",
      "Sample string (Document 29) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' m', 'il', 'le', 'r,', ' w', 'ho', ' h', 'ad', ' a', ' b', 'ea', 'ut', 'if', 'ul', '\\nd', 'au', 'gh', 'te', 'r,', ' a', 'nd', ' a', 's ', 'sh', 'e ', 'wa', 's ', 'gr', 'ow', 'n ', 'up', ', ', 'he', ' w', 'is', 'he']\n",
      "\n",
      "Processing file stories\\031.txt\n",
      "Data size (Characters) (Document 30) 1624\n",
      "Sample string (Document 30) ['a ', 'po', 'or', ' m', 'an', ' h', 'ad', ' s', 'o ', 'ma', 'ny', ' c', 'hi', 'ld', 're', 'n ', 'th', 'at', ' h', 'e ', 'ha', 'd ', 'al', 're', 'ad', 'y ', 'as', 'ke', 'd\\n', 'ev', 'er', 'yo', 'ne', ' i', 'n ', 'th', 'e ', 'wo', 'rl', 'd ', 'to', ' b', 'e ', 'go', 'df', 'at', 'he', 'r,', ' a', 'nd']\n",
      "\n",
      "Processing file stories\\032.txt\n",
      "Data size (Characters) (Document 31) 758\n",
      "Sample string (Document 31) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' l', 'it', 'tl', 'e ', 'gi', 'rl', ' w', 'ho', ' w', 'as', ' o', 'bs', 'ti', 'na', 'te', ' a', 'nd', ' i', 'nq', 'ui', 'si', 'ti', 've', ',\\n', 'an', 'd ', 'wh', 'en', ' h', 'er', ' p', 'ar', 'en', 'ts', ' t', 'ol', 'd ', 'he', 'r ', 'to', ' d', 'o ']\n",
      "\n",
      "Processing file stories\\033.txt\n",
      "Data size (Characters) (Document 32) 3121\n",
      "Sample string (Document 32) ['a ', 'po', 'or', ' m', 'an', ' h', 'ad', ' t', 'we', 'lv', 'e ', 'ch', 'il', 'dr', 'en', ' a', 'nd', ' w', 'as', ' f', 'or', 'ce', 'd ', 'to', ' w', 'or', 'k ', 'ni', 'gh', 't ', 'an', 'd\\n', 'da', 'y ', 'to', ' g', 'iv', 'e ', 'th', 'em', ' e', 've', 'n ', 'br', 'ea', 'd.', '  ', 'wh', 'en', ' t']\n",
      "\n",
      "Processing file stories\\034.txt\n",
      "Data size (Characters) (Document 33) 4192\n",
      "Sample string (Document 33) ['a ', 'ce', 'rt', 'ai', 'n ', 'ta', 'il', 'or', ' h', 'ad', ' a', ' s', 'on', ', ', 'wh', 'o ', 'ha', 'pp', 'en', 'ed', ' t', 'o ', 'be', ' s', 'ma', 'll', ', ', 'an', 'd\\n', 'no', ' b', 'ig', 'ge', 'r ', 'th', 'an', ' a', ' t', 'hu', 'mb', ', ', 'an', 'd ', 'on', ' t', 'hi', 's ', 'ac', 'co', 'un']\n",
      "\n",
      "Processing file stories\\035.txt\n",
      "Data size (Characters) (Document 34) 3650\n",
      "Sample string (Document 34) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' w', 'iz', 'ar', 'd ', 'wh', 'o ', 'us', 'ed', ' t', 'o ', 'ta', 'ke', ' t', 'he', ' f', 'or', 'm ', 'of', ' a', ' p', 'oo', 'r\\n', 'ma', 'n,', ' a', 'nd', ' w', 'en', 't ', 'to', ' h', 'ou', 'se', 's ', 'an', 'd ', 'be', 'gg', 'ed', ', ', 'an', 'd ']\n",
      "\n",
      "Processing file stories\\036.txt\n",
      "Data size (Characters) (Document 35) 8219\n",
      "Sample string (Document 35) ['it', ' i', 's ', 'no', 'w ', 'lo', 'ng', ' a', 'go', ', ', 'qu', 'it', 'e ', 'tw', 'o ', 'th', 'ou', 'sa', 'nd', ' y', 'ea', 'rs', ', ', 'si', 'nc', 'e ', 'th', 'er', 'e ', 'wa', 's\\n', 'a ', 'ri', 'ch', ' m', 'an', ' w', 'ho', ' h', 'ad', ' a', ' b', 'ea', 'ut', 'if', 'ul', ' a', 'nd', ' p', 'io']\n",
      "\n",
      "Processing file stories\\037.txt\n",
      "Data size (Characters) (Document 36) 2151\n",
      "Sample string (Document 36) ['a ', 'fa', 'rm', 'er', ' o', 'nc', 'e ', 'ha', 'd ', 'a ', 'fa', 'it', 'hf', 'ul', ' d', 'og', ' c', 'al', 'le', 'd ', 'su', 'lt', 'an', ', ', 'wh', 'o ', 'ha', 'd ', 'gr', 'ow', 'n\\n', 'ol', 'd,', ' a', 'nd', ' l', 'os', 't ', 'al', 'l ', 'hi', 's ', 'te', 'et', 'h,', ' s', 'o ', 'th', 'at', ' h']\n",
      "\n",
      "Processing file stories\\038.txt\n",
      "Data size (Characters) (Document 37) 5129\n",
      "Sample string (Document 37) ['on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ', ', 'a ', 'ce', 'rt', 'ai', 'n ', 'ki', 'ng', ' w', 'as', ' h', 'un', 'ti', 'ng', ' i', 'n ', 'a ', 'gr', 'ea', 't ', 'fo', 're', 'st', ',\\n', 'an', 'd ', 'he', ' c', 'ha', 'se', 'd ', 'a ', 'wi', 'ld', ' b', 'ea', 'st', ' s', 'o ', 'ea', 'ge', 'rl']\n",
      "\n",
      "Processing file stories\\039.txt\n",
      "Data size (Characters) (Document 38) 3472\n",
      "Sample string (Document 38) ['\\tb', 'ri', 'ar', '-r', 'os', 'e\\n', '\\na', ' l', 'on', 'g ', 'ti', 'me', ' a', 'go', ' t', 'he', 're', ' w', 'er', 'e ', 'a ', 'ki', 'ng', ' a', 'nd', ' q', 'ue', 'en', ' w', 'ho', ' s', 'ai', 'd ', 'ev', 'er', 'y\\n', 'da', 'y,', ' a', 'h,', ' i', 'f ', 'on', 'ly', ' w', 'e ', 'ha', 'd ', 'a ', 'ch']\n",
      "\n",
      "Processing file stories\\040.txt\n",
      "Data size (Characters) (Document 39) 2490\n",
      "Sample string (Document 39) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' f', 'or', 'es', 'te', 'r ', 'wh', 'o ', 'we', 'nt', ' i', 'nt', 'o ', 'th', 'e ', 'fo', 're', 'st', ' t', 'o ', 'hu', 'nt', ',\\n', 'an', 'd ', 'as', ' h', 'e ', 'en', 'te', 're', 'd ', 'it', ' h', 'e ', 'he', 'ar', 'd ', 'a ', 'so', 'un', 'd ', 'of']\n",
      "\n",
      "Processing file stories\\041.txt\n",
      "Data size (Characters) (Document 40) 4273\n",
      "Sample string (Document 40) ['a ', 'ki', 'ng', ' h', 'ad', ' a', ' d', 'au', 'gh', 'te', 'r ', 'wh', 'o ', 'wa', 's ', 'be', 'au', 'ti', 'fu', 'l ', 'be', 'yo', 'nd', ' a', 'll', ' m', 'ea', 'su', 're', ',\\n', 'bu', 't ', 'so', ' p', 'ro', 'ud', ' a', 'nd', ' h', 'au', 'gh', 'ty', ' w', 'it', 'ha', 'l ', 'th', 'at', ' n', 'o ']\n",
      "\n",
      "Processing file stories\\042.txt\n",
      "Data size (Characters) (Document 41) 8327\n",
      "Sample string (Document 41) ['\\ts', 'no', 'w ', 'wh', 'it', 'e ', 'an', 'd ', 'th', 'e ', 'se', 've', 'n ', 'dw', 'ar', 'fs', '\\n\\n', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' i', 'n ', 'th', 'e ', 'mi', 'dd', 'le', ' o', 'f ', 'wi', 'nt', 'er', ', ', 'wh', 'en', ' t', 'he', ' f', 'la', 'ke', 's ', 'of', '\\ns', 'no', 'w ']\n",
      "\n",
      "Processing file stories\\043.txt\n",
      "Data size (Characters) (Document 42) 6128\n",
      "Sample string (Document 42) ['th', 'er', 'e ', 'we', 're', ' o', 'nc', 'e ', 'th', 're', 'e ', 'br', 'ot', 'he', 'rs', ' w', 'ho', ' h', 'ad', ' f', 'al', 'le', 'n ', 'de', 'ep', 'er', ' a', 'nd', ' d', 'ee', 'pe', 'r ', 'in', 'to', '\\np', 'ov', 'er', 'ty', ', ', 'an', 'd ', 'at', ' l', 'as', 't ', 'th', 'ei', 'r ', 'ne', 'ed']\n",
      "\n",
      "Processing file stories\\044.txt\n",
      "Data size (Characters) (Document 43) 2819\n",
      "Sample string (Document 43) ['\\tr', 'um', 'pe', 'ls', 'ti', 'lt', 'sk', 'in', '\\n\\n', 'on', 'ce', ' t', 'he', 're', ' w', 'as', ' a', ' m', 'il', 'le', 'r ', 'wh', 'o ', 'wa', 's ', 'po', 'or', ', ', 'bu', 't ', 'wh', 'o ', 'ha', 'd ', 'a ', 'be', 'au', 'ti', 'fu', 'l\\n', 'da', 'ug', 'ht', 'er', '. ', ' n', 'ow', ' i', 't ', 'ha']\n",
      "\n",
      "Processing file stories\\045.txt\n",
      "Data size (Characters) (Document 44) 3822\n",
      "Sample string (Document 44) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' w', 'om', 'an', ' w', 'ho', ' w', 'as', ' a', ' r', 'ea', 'l ', 'wi', 'tc', 'h ', 'an', 'd ', 'ha', 'd ', 'tw', 'o\\n', 'da', 'ug', 'ht', 'er', 's,', ' o', 'ne', ' u', 'gl', 'y ', 'an', 'd ', 'wi', 'ck', 'ed', ', ']\n",
      "\n",
      "Processing file stories\\046.txt\n",
      "Data size (Characters) (Document 45) 7772\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sample string (Document 45) ['in', ' o', 'ld', 'en', ' t', 'im', 'es', ' t', 'he', 're', ' w', 'as', ' a', ' k', 'in', 'g,', ' w', 'ho', ' h', 'ad', ' b', 'eh', 'in', 'd ', 'hi', 's ', 'pa', 'la', 'ce', ' a', '\\nb', 'ea', 'ut', 'if', 'ul', ' p', 'le', 'as', 'ur', 'e-', 'ga', 'rd', 'en', ' i', 'n ', 'wh', 'ic', 'h ', 'th', 'er']\n",
      "\n",
      "Processing file stories\\047.txt\n",
      "Data size (Characters) (Document 46) 22158\n",
      "Sample string (Document 46) ['th', 'er', 'e ', 'we', 're', ' o', 'nc', 'e ', 'up', 'on', ' a', ' t', 'im', 'e ', 'tw', 'o ', 'br', 'ot', 'he', 'rs', ', ', 'on', 'e ', 'ri', 'ch', ' a', 'nd', ' t', 'he', ' o', 'th', 'er', '\\np', 'oo', 'r.', '  ', 'th', 'e ', 'ri', 'ch', ' o', 'ne', ' w', 'as', ' a', ' g', 'ol', 'ds', 'mi', 'th']\n",
      "\n",
      "Processing file stories\\048.txt\n",
      "Data size (Characters) (Document 47) 2169\n",
      "Sample string (Document 47) ['tw', 'o ', 'ki', 'ng', \"s'\", ' s', 'on', 's ', 'on', 'ce', ' w', 'en', 't ', 'ou', 't ', 'in', ' s', 'ea', 'rc', 'h ', 'of', ' a', 'dv', 'en', 'tu', 're', 's,', ' a', 'nd', ' f', 'el', 'l ', 'in', 'to', '\\na', ' w', 'il', 'd,', ' d', 'is', 'or', 'de', 'rl', 'y ', 'wa', 'y ', 'of', ' l', 'iv', 'in']\n",
      "\n",
      "Processing file stories\\049.txt\n",
      "Data size (Characters) (Document 48) 2822\n",
      "Sample string (Document 48) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' k', 'in', 'g ', 'wh', 'o ', 'ha', 'd ', 'th', 're', 'e ', 'so', 'ns', ', ', 'of', ' w', 'ho', 'm ', 'tw', 'o\\n', 'we', 're', ' c', 'le', 've', 'r ', 'an', 'd ', 'wi', 'se', ', ', 'bu', 't ', 'th', 'e ', 'th', 'ir']\n",
      "\n",
      "Processing file stories\\050.txt\n",
      "Data size (Characters) (Document 49) 4034\n",
      "Sample string (Document 49) ['th', 'er', 'e ', 'wa', 's ', 'a ', 'ma', 'n ', 'wh', 'o ', 'ha', 'd ', 'th', 're', 'e ', 'so', 'ns', ', ', 'th', 'e ', 'yo', 'un', 'ge', 'st', ' o', 'f ', 'wh', 'om', ' w', 'as', ' c', 'al', 'le', 'd\\n', 'du', 'mm', 'li', 'ng', ', ', 'an', 'd ', 'wa', 's ', 'de', 'sp', 'is', 'ed', ', ', 'mo', 'ck']\n",
      "\n",
      "Processing file stories\\051.txt\n",
      "Data size (Characters) (Document 50) 5608\n",
      "Sample string (Document 50) ['\\ta', 'll', 'er', 'le', 'ir', 'au', 'h\\n', '\\nt', 'he', 're', ' w', 'as', ' o', 'nc', 'e ', 'up', 'on', ' a', ' t', 'im', 'e ', 'a ', 'ki', 'ng', ' w', 'ho', ' h', 'ad', ' a', ' w', 'if', 'e ', 'wi', 'th', ' g', 'ol', 'de', 'n ', 'ha', 'ir', ',\\n', 'an', 'd ', 'sh', 'e ', 'wa', 's ', 'so', ' b', 'ea']\n",
      "\n",
      "Processing file stories\\052.txt\n",
      "Data size (Characters) (Document 51) 1287\n",
      "Sample string (Document 51) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' w', 'om', 'an', ' a', 'nd', ' h', 'er', ' d', 'au', 'gh', 'te', 'r ', 'wh', 'o ', 'li', 've', 'd ', 'in', ' a', '\\np', 're', 'tt', 'y ', 'ga', 'rd', 'en', ' w', 'it', 'h ', 'ca', 'bb', 'ag', 'es', '. ', ' a', 'nd', ' a', ' l', 'it', 'tl', 'e ', 'ha']\n",
      "\n",
      "Processing file stories\\053.txt\n",
      "Data size (Characters) (Document 52) 2841\n",
      "Sample string (Document 52) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' k', 'in', \"g'\", 's ', 'so', 'n ', 'wh', 'o ', 'ha', 'd ', 'a ', 'br', 'id', 'e ', 'wh', 'om', ' h', 'e ', 'lo', 've', 'd ', 've', 'ry', ' m', 'uc', 'h.', '\\na', 'nd', ' w', 'he', 'n ', 'he', ' w', 'as', ' s', 'it', 'ti', 'ng', ' b', 'es', 'id', 'e ']\n",
      "\n",
      "Processing file stories\\054.txt\n",
      "Data size (Characters) (Document 53) 1922\n",
      "Sample string (Document 53) ['ha', 'ns', ' w', 'is', 'he', 'd ', 'to', ' p', 'ut', ' h', 'is', ' s', 'on', ' t', 'o ', 'le', 'ar', 'n ', 'a ', 'tr', 'ad', 'e,', ' s', 'o ', 'he', ' w', 'en', 't ', 'in', 'to', ' t', 'he', '\\nc', 'hu', 'rc', 'h ', 'an', 'd ', 'pr', 'ay', 'ed', ' t', 'o ', 'ou', 'r ', 'lo', 'rd', ' g', 'od', ' t']\n",
      "\n",
      "Processing file stories\\055.txt\n",
      "Data size (Characters) (Document 54) 2573\n",
      "Sample string (Document 54) ['a ', 'fa', 'th', 'er', ' o', 'nc', 'e ', 'ca', 'll', 'ed', ' h', 'is', ' t', 'hr', 'ee', ' s', 'on', 's ', 'be', 'fo', 're', ' h', 'im', ', ', 'an', 'd ', 'he', ' g', 'av', 'e ', 'to', ' t', 'he', '\\nf', 'ir', 'st', ' a', ' c', 'oc', 'k,', ' t', 'o ', 'th', 'e ', 'se', 'co', 'nd', ' a', ' s', 'cy']\n",
      "\n",
      "Processing file stories\\056.txt\n",
      "Data size (Characters) (Document 55) 5285\n",
      "Sample string (Document 55) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' m', 'an', ' w', 'ho', ' u', 'nd', 'er', 'st', 'oo', 'd ', 'al', 'l ', 'ki', 'nd', 's ', 'of', ' a', 'rt', 's.', '  ', 'he', ' s', 'er', 've', 'd ', 'in', '\\nw', 'ar', ', ', 'an', 'd ', 'be', 'ha', 've', 'd ', 'we', 'll', ' a', 'nd', ' b', 'ra', 've']\n",
      "\n",
      "Processing file stories\\057.txt\n",
      "Data size (Characters) (Document 56) 971\n",
      "Sample string (Document 56) ['th', 'e ', 'sh', 'e-', 'wo', 'lf', ' b', 'ro', 'ug', 'ht', ' i', 'nt', 'o ', 'th', 'e ', 'wo', 'rl', 'd ', 'a ', 'yo', 'un', 'g ', 'on', 'e,', ' a', 'nd', ' i', 'nv', 'it', 'ed', ' t', 'he', ' f', 'ox', '\\nt', 'o ', 'be', ' g', 'od', 'fa', 'th', 'er', '. ', ' a', 'ft', 'er', ' a', 'll', ', ', 'he']\n",
      "\n",
      "Processing file stories\\058.txt\n",
      "Data size (Characters) (Document 57) 4538\n",
      "Sample string (Document 57) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' q', 'ue', 'en', ' t', 'o ', 'wh', 'om', ' g', 'od', ' h', 'ad', ' g', 'iv', 'en', ' n', 'o ', 'ch', 'il', 'dr', 'en', '.\\n', 'ev', 'er', 'y ', 'mo', 'rn', 'in', 'g ', 'sh', 'e ', 'we', 'nt', ' i', 'nt', 'o ', 'th']\n",
      "\n",
      "Processing file stories\\059.txt\n",
      "Data size (Characters) (Document 58) 636\n",
      "Sample string (Document 58) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' v', 'er', 'y ', 'ol', 'd ', 'ma', 'n,', ' w', 'ho', 'se', ' e', 'ye', 's ', 'ha', 'd ', 'be', 'co', 'me', ' d', 'im', ', ', 'hi', 's ', 'ea', 'rs', '\\nd', 'ul', 'l ', 'of', ' h', 'ea', 'ri', 'ng', ', ', 'hi', 's ', 'kn', 'ee', 's ', 'tr', 'em', 'bl']\n",
      "\n",
      "Processing file stories\\060.txt\n",
      "Data size (Characters) (Document 59) 786\n",
      "Sample string (Document 59) ['a ', 'li', 'tt', 'le', ' b', 'ro', 'th', 'er', ' a', 'nd', ' s', 'is', 'te', 'r ', 'we', 're', ' o', 'nc', 'e ', 'pl', 'ay', 'in', 'g ', 'by', ' a', ' w', 'el', 'l,', ' a', 'nd', ' w', 'hi', 'le', '\\nt', 'he', 'y ', 'we', 're', ' t', 'hu', 's ', 'pl', 'ay', 'in', 'g,', ' t', 'he', 'y ', 'bo', 'th']\n",
      "\n",
      "Processing file stories\\061.txt\n",
      "Data size (Characters) (Document 60) 10687\n",
      "Sample string (Document 60) ['th', 'er', 'e ', 'wa', 's ', 'on', 'e ', 'up', 'on', ' a', ' t', 'im', 'e ', 'a ', 'gr', 'ea', 't ', 'wa', 'r,', ' a', 'nd', ' w', 'he', 'n ', 'it', ' c', 'am', 'e ', 'to', ' a', 'n ', 'en', 'd,', '\\nm', 'an', 'y ', 'so', 'ld', 'ie', 'rs', ' w', 'er', 'e ', 'di', 'sc', 'ha', 'rg', 'ed', '. ', ' t']\n",
      "\n",
      "Processing file stories\\062.txt\n",
      "Data size (Characters) (Document 61) 5105\n",
      "Sample string (Document 61) ['ha', 'ns', ' h', 'ad', ' s', 'er', 've', 'd ', 'hi', 's ', 'ma', 'st', 'er', ' f', 'or', ' s', 'ev', 'en', ' y', 'ea', 'rs', ', ', 'so', ' h', 'e ', 'sa', 'id', ' t', 'o ', 'hi', 'm,', '\\nm', 'as', 'te', 'r,', ' m', 'y ', 'ti', 'me', ' i', 's ', 'up', ', ', 'no', 'w ', 'i ', 'sh', 'ou', 'ld', ' b']\n",
      "\n",
      "Processing file stories\\063.txt\n",
      "Data size (Characters) (Document 62) 1127\n",
      "Sample string (Document 62) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' y', 'ou', 'ng', ' p', 'ea', 'sa', 'nt', ' n', 'am', 'ed', ' h', 'an', 's,', ' w', 'ho', 'se', ' u', 'nc', 'le', '\\nw', 'an', 'te', 'd ', 'to', ' f', 'in', 'd ', 'hi', 'm ', 'a ', 'ri', 'ch', ' w', 'if', 'e.', '  ']\n",
      "\n",
      "Processing file stories\\064.txt\n",
      "Data size (Characters) (Document 63) 4981\n",
      "Sample string (Document 63) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' p', 'oo', 'r ', 'ma', 'n ', 'an', 'd ', 'a ', 'po', 'or', ' w', 'om', 'an', ' w', 'ho', ' h', 'ad', ' n', 'ot', 'hi', 'ng', ' b', 'ut', ' a', '\\nl', 'it', 'tl', 'e ', 'co', 'tt', 'ag', 'e,', ' a', 'nd', ' w', 'ho', ' e', 'ar', 'ne', 'd ', 'th', 'ei']\n",
      "\n",
      "Processing file stories\\065.txt\n",
      "Data size (Characters) (Document 64) 6006\n",
      "Sample string (Document 64) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' m', 'an', ' w', 'ho', ' w', 'as', ' a', 'bo', 'ut', ' t', 'o ', 'se', 't ', 'ou', 't ', 'on', ' a', ' l', 'on', 'g\\n', 'jo', 'ur', 'ne', 'y,', ' a', 'nd', ' o', 'n ', 'pa', 'rt', 'in', 'g ', 'he', ' a', 'sk', 'ed']\n",
      "\n",
      "Processing file stories\\066.txt\n",
      "Data size (Characters) (Document 65) 5900\n",
      "Sample string (Document 65) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', 'n ', 'ol', 'd ', 'qu', 'ee', 'n ', 'wh', 'os', 'e ', 'hu', 'sb', 'an', 'd ', 'ha', 'd ', 'be', 'en', ' d', 'ea', 'd\\n', 'fo', 'r ', 'ma', 'ny', ' y', 'ea', 'rs', ', ', 'an', 'd ', 'sh', 'e ', 'ha', 'd ', 'a ', 'be']\n",
      "\n",
      "Processing file stories\\067.txt\n",
      "Data size (Characters) (Document 66) 7837\n",
      "Sample string (Document 66) ['on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' c', 'ou', 'nt', 'ry', 'ma', 'n ', 'ha', 'd ', 'a ', 'so', 'n ', 'wh', 'o ', 'wa', 's ', 'as', ' b', 'ig', ' a', 's ', 'a ', 'th', 'um', 'b,', '\\na', 'nd', ' d', 'id', ' n', 'ot', ' b', 'ec', 'om', 'e ', 'an', 'y ', 'bi', 'gg', 'er', ', ', 'an']\n",
      "\n",
      "Processing file stories\\068.txt\n",
      "Data size (Characters) (Document 67) 4717\n",
      "Sample string (Document 67) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' r', 'ic', 'h ', 'ki', 'ng', ' w', 'ho', ' h', 'ad', ' t', 'hr', 'ee', ' d', 'au', 'gh', 'te', 'rs', ', ', 'wh', 'o\\n', 'da', 'il', 'y ', 'we', 'nt', ' t', 'o ', 'wa', 'lk', ' i', 'n ', 'th', 'e ', 'pa', 'la', 'ce']\n",
      "\n",
      "Processing file stories\\069.txt\n",
      "Data size (Characters) (Document 68) 6233\n",
      "Sample string (Document 68) ['th', 'er', 'e ', 'wa', 's ', 'a ', 'ce', 'rt', 'ai', 'n ', 'me', 'rc', 'ha', 'nt', ' w', 'ho', ' h', 'ad', ' t', 'wo', ' c', 'hi', 'ld', 're', 'n,', ' a', ' b', 'oy', ' a', 'nd', ' a', ' g', 'ir', 'l,', '\\nt', 'he', 'y ', 'we', 're', ' b', 'ot', 'h ', 'yo', 'un', 'g,', ' a', 'nd', ' c', 'ou', 'ld']\n",
      "\n",
      "Processing file stories\\070.txt\n",
      "Data size (Characters) (Document 69) 5664\n",
      "Sample string (Document 69) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' q', 'ue', 'en', ' w', 'ho', ' h', 'ad', ' a', ' l', 'it', 'tl', 'e ', 'da', 'ug', 'ht', 'er', ' w', 'ho', '\\nw', 'as', ' s', 'ti', 'll', ' s', 'o ', 'yo', 'un', 'g ', 'th', 'at', ' s', 'he', ' h', 'ad', ' t', 'o ']\n",
      "\n",
      "Processing file stories\\071.txt\n",
      "Data size (Characters) (Document 70) 3569\n",
      "Sample string (Document 70) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' p', 'oo', 'r ', 'pe', 'as', 'an', 't ', 'wh', 'o ', 'ha', 'd ', 'no', ' l', 'an', 'd,', ' b', 'ut', ' o', 'nl', 'y ', 'a ', 'sm', 'al', 'l\\n', 'ho', 'us', 'e,', ' a', 'nd', ' o', 'ne', ' d', 'au', 'gh', 'te', 'r.', '  ', 'th', 'en', ' s', 'ai', 'd ']\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing file stories\\072.txt\n",
      "Data size (Characters) (Document 71) 3793\n",
      "Sample string (Document 71) ['ab', 'ou', 't ', 'a ', 'th', 'ou', 'sa', 'nd', ' o', 'r ', 'mo', 're', ' y', 'ea', 'rs', ' a', 'go', ', ', 'th', 'er', 'e ', 'we', 're', ' i', 'n ', 'th', 'is', '\\nc', 'ou', 'nt', 'ry', ' n', 'ot', 'hi', 'ng', ' b', 'ut', ' s', 'ma', 'll', ' k', 'in', 'gs', ', ', 'an', 'd ', 'on', 'e ', 'of', ' t']\n",
      "\n",
      "Processing file stories\\073.txt\n",
      "Data size (Characters) (Document 72) 5980\n",
      "Sample string (Document 72) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' k', 'in', 'g ', 'wh', 'o ', 'ha', 'd ', 'an', ' i', 'll', 'ne', 'ss', ', ', 'an', 'd ', 'no', ' o', 'ne', ' b', 'el', 'ie', 've', 'd ', 'th', 'at', ' h', 'e\\n', 'wo', 'ul', 'd ', 'co', 'me', ' o', 'ut', ' o', 'f ', 'it', ' w', 'it', 'h ', 'hi', 's ']\n",
      "\n",
      "Processing file stories\\074.txt\n",
      "Data size (Characters) (Document 73) 4518\n",
      "Sample string (Document 73) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' p', 'oo', 'r ', 'wo', 'od', 'cu', 'tt', 'er', ' w', 'ho', ' t', 'oi', 'le', 'd ', 'fr', 'om', ' e', 'ar', 'ly', '\\nm', 'or', 'ni', 'ng', ' t', 'il', 'l ', 'la', 'te', ' a', 't ', 'ni', 'gh', 't.', '  ', 'wh', 'en', ' a', 't ', 'la', 'st', ' h', 'e ']\n",
      "\n",
      "Processing file stories\\075.txt\n",
      "Data size (Characters) (Document 74) 3247\n",
      "Sample string (Document 74) ['a ', 'di', 'sc', 'ha', 'rg', 'ed', ' s', 'ol', 'di', 'er', ' h', 'ad', ' n', 'ot', 'hi', 'ng', ' t', 'o ', 'li', 've', ' o', 'n,', ' a', 'nd', ' d', 'id', ' n', 'ot', ' k', 'no', 'w ', 'ho', 'w ', 'to', '\\nm', 'ak', 'e ', 'hi', 's ', 'wa', 'y.', '  ', 'so', ' h', 'e ', 'we', 'nt', ' o', 'ut', ' i']\n",
      "\n",
      "Processing file stories\\076.txt\n",
      "Data size (Characters) (Document 75) 5130\n",
      "Sample string (Document 75) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' y', 'ou', 'ng', ' f', 'el', 'lo', 'w ', 'wh', 'o ', 'en', 'li', 'st', 'ed', ' a', 's ', 'a ', 'so', 'ld', 'ie', 'r,', ' c', 'on', 'du', 'ct', 'ed', '\\nh', 'im', 'se', 'lf', ' b', 'ra', 've', 'ly', ', ', 'an', 'd ', 'wa', 's ', 'al', 'wa', 'ys', ' t']\n",
      "\n",
      "Processing file stories\\077.txt\n",
      "Data size (Characters) (Document 76) 2401\n",
      "Sample string (Document 76) ['on', 'ce', ' i', 'n ', 'su', 'mm', 'er', '-t', 'im', 'e ', 'th', 'e ', 'be', 'ar', ' a', 'nd', ' t', 'he', ' w', 'ol', 'f ', 'we', 're', ' w', 'al', 'ki', 'ng', ' i', 'n ', 'th', 'e ', 'fo', 're', 'st', ',\\n', 'an', 'd ', 'th', 'e ', 'be', 'ar', ' h', 'ea', 'rd', ' a', ' b', 'ir', 'd ', 'si', 'ng']\n",
      "\n",
      "Processing file stories\\078.txt\n",
      "Data size (Characters) (Document 77) 624\n",
      "Sample string (Document 77) ['th', 'er', 'e ', 'wa', 's ', 'a ', 'po', 'or', ' b', 'ut', ' g', 'oo', 'd ', 'li', 'tt', 'le', ' g', 'ir', 'l ', 'wh', 'o ', 'li', 've', 'd ', 'al', 'on', 'e ', 'wi', 'th', ' h', 'er', '\\nm', 'ot', 'he', 'r,', ' a', 'nd', ' t', 'he', 'y ', 'no', ' l', 'on', 'ge', 'r ', 'ha', 'd ', 'an', 'yt', 'hi']\n",
      "\n",
      "Processing file stories\\079.txt\n",
      "Data size (Characters) (Document 78) 3991\n",
      "Sample string (Document 78) ['on', 'e ', 'da', 'y ', 'a ', 'pe', 'as', 'an', 't ', 'to', 'ok', ' h', 'is', ' g', 'oo', 'd ', 'ha', 'ze', 'l-', 'st', 'ic', 'k ', 'ou', 't ', 'of', ' t', 'he', ' c', 'or', 'ne', 'r\\n', 'an', 'd ', 'sa', 'id', ' t', 'o ', 'hi', 's ', 'wi', 'fe', ', ', 'tr', 'in', 'a,', ' i', ' a', 'm ', 'go', 'in']\n",
      "\n",
      "Processing file stories\\080.txt\n",
      "Data size (Characters) (Document 79) 1426\n",
      "Sample string (Document 79) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' l', 'it', 'tl', 'e ', 'ch', 'il', 'd ', 'wh', 'os', 'e ', 'mo', 'th', 'er', ' g', 'av', 'e ', 'he', 'r ', 'ev', 'er', 'y\\n', 'af', 'te', 'rn', 'oo', 'n ', 'a ', 'sm', 'al', 'l ', 'bo', 'wl', ' o', 'f ', 'mi', 'lk', ' a', 'nd', ' b', 're', 'ad', ', ']\n",
      "\n",
      "Processing file stories\\081.txt\n",
      "Data size (Characters) (Document 80) 3574\n",
      "Sample string (Document 80) ['in', ' a', ' c', 'er', 'ta', 'in', ' m', 'il', 'l ', 'li', 've', 'd ', 'an', ' o', 'ld', ' m', 'il', 'le', 'r ', 'wh', 'o ', 'ha', 'd ', 'ne', 'it', 'he', 'r ', 'wi', 'fe', ' n', 'or', ' c', 'hi', 'ld', ',\\n', 'an', 'd ', 'th', 're', 'e ', 'ap', 'pr', 'en', 'ti', 'ce', 's ', 'se', 'rv', 'ed', ' u']\n",
      "\n",
      "Processing file stories\\082.txt\n",
      "Data size (Characters) (Document 81) 10822\n",
      "Sample string (Document 81) ['hi', 'll', ' a', 'nd', ' v', 'al', 'e ', 'do', ' n', 'ot', ' m', 'ee', 't,', ' b', 'ut', ' t', 'he', ' c', 'hi', 'ld', 're', 'n ', 'of', ' m', 'en', ' d', 'o,', ' g', 'oo', 'd ', 'an', 'd ', 'ba', 'd.', '\\ni', 'n ', 'th', 'is', ' w', 'ay', ' a', ' s', 'ho', 'em', 'ak', 'er', ' a', 'nd', ' a', ' t']\n",
      "\n",
      "Processing file stories\\083.txt\n",
      "Data size (Characters) (Document 82) 5480\n",
      "Sample string (Document 82) ['\\th', 'an', 's ', 'th', 'e ', 'he', 'dg', 'eh', 'og', '\\n\\n', 'th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' c', 'ou', 'nt', 'ry', ' m', 'an', ' w', 'ho', ' h', 'ad', ' m', 'on', 'ey', ' a', 'nd', ' l', 'an', 'd ', 'in', ' p', 'le', 'nt', 'y,', ' b', 'ut', '\\nh', 'ow', 'ev', 'er', ' r', 'ic', 'h ']\n",
      "\n",
      "Processing file stories\\084.txt\n",
      "Data size (Characters) (Document 83) 658\n",
      "Sample string (Document 83) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' m', 'ot', 'he', 'r ', 'wh', 'o ', 'ha', 'd ', 'a ', 'li', 'tt', 'le', ' b', 'oy', ' o', 'f ', 'se', 've', 'n ', 'ye', 'ar', 's ', 'ol', 'd,', ' w', 'ho', '\\nw', 'as', ' s', 'o ', 'ha', 'nd', 'so', 'me', ' a', 'nd', ' l', 'ov', 'ab', 'le', ' t', 'ha']\n",
      "\n",
      "Processing file stories\\085.txt\n",
      "Data size (Characters) (Document 84) 5989\n",
      "Sample string (Document 84) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' y', 'ou', 'ng', ' f', 'el', 'lo', 'w ', 'wh', 'o ', 'ha', 'd ', 'le', 'ar', 'nt', ' t', 'he', ' t', 'ra', 'de', ' o', 'f ', 'lo', 'ck', 'sm', 'it', 'h,', '\\na', 'nd', ' t', 'ol', 'd ', 'hi', 's ', 'fa', 'th', 'er', ' h', 'e ', 'wo', 'ul', 'd ', 'no']\n",
      "\n",
      "Processing file stories\\086.txt\n",
      "Data size (Characters) (Document 85) 8758\n",
      "Sample string (Document 85) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' k', 'in', 'g ', 'wh', 'o ', 'ha', 'd ', 'a ', 'li', 'tt', 'le', ' b', 'oy', ' i', 'n ', 'wh', 'os', 'e ', 'st', 'ar', 's\\n', 'it', ' h', 'ad', ' b', 'ee', 'n ', 'fo', 're', 'to', 'ld', ' t', 'ha', 't ', 'he', ' s']\n",
      "\n",
      "Processing file stories\\087.txt\n",
      "Data size (Characters) (Document 86) 3109\n",
      "Sample string (Document 86) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' a', ' p', 'ri', 'nc', 'es', 's ', 'wh', 'o ', 'wa', 's ', 'ex', 'tr', 'em', 'el', 'y ', 'pr', 'ou', 'd.', ' i', 'f ', 'a\\n', 'wo', 'oe', 'r ', 'ca', 'me', ' s', 'he', ' g', 'av', 'e ', 'hi', 'm ', 'so', 'me', ' r', 'id']\n",
      "\n",
      "Processing file stories\\088.txt\n",
      "Data size (Characters) (Document 87) 1365\n",
      "Sample string (Document 87) ['a ', 'ta', 'il', 'or', \"'s\", ' a', 'pp', 're', 'nt', 'ic', 'e ', 'wa', 's ', 'tr', 'av', 'el', 'in', 'g ', 'ab', 'ou', 't ', 'th', 'e ', 'wo', 'rl', 'd ', 'in', ' s', 'ea', 'rc', 'h ', 'of', '\\nw', 'or', 'k,', ' a', 'nd', ' a', 't ', 'on', 'e ', 'ti', 'me', ' h', 'e ', 'co', 'ul', 'd ', 'fi', 'nd']\n",
      "\n",
      "Processing file stories\\089.txt\n",
      "Data size (Characters) (Document 88) 4538\n",
      "Sample string (Document 88) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' o', 'n ', 'a ', 'ti', 'me', ' a', ' s', 'ol', 'di', 'er', ' w', 'ho', ' f', 'or', ' m', 'an', 'y ', 'ye', 'ar', 's ', 'ha', 'd ', 'se', 'rv', 'ed', ' t', 'he', '\\nk', 'in', 'g ', 'fa', 'it', 'hf', 'ul', 'ly', ', ', 'bu', 't ', 'wh', 'en', ' t', 'he', ' w']\n",
      "\n",
      "Processing file stories\\090.txt\n",
      "Data size (Characters) (Document 89) 345\n",
      "Sample string (Document 89) ['on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' t', 'he', 're', ' w', 'as', ' a', ' c', 'hi', 'ld', ' w', 'ho', ' w', 'as', ' w', 'il', 'lf', 'ul', ', ', 'an', 'd ', 'wo', 'ul', 'd ', 'no', 't ', 'do', '\\nw', 'ha', 't ', 'he', 'r ', 'mo', 'th', 'er', ' w', 'is', 'he', 'd.', '  ', 'fo', 'r ', 'th']\n",
      "\n",
      "Processing file stories\\091.txt\n",
      "Data size (Characters) (Document 90) 5460\n",
      "Sample string (Document 90) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' k', 'in', \"g'\", 's ', 'so', 'n,', ' w', 'ho', ' w', 'as', ' n', 'o ', 'lo', 'ng', 'er', ' c', 'on', 'te', 'nt', ' t', 'o ', 'st', 'ay', ' a', 't\\n', 'ho', 'me', ' i', 'n ', 'hi', 's ', 'fa', 'th', 'er', \"'s\", ' h', 'ou', 'se', ', ', 'an', 'd ', 'as']\n",
      "\n",
      "Processing file stories\\092.txt\n",
      "Data size (Characters) (Document 91) 6854\n",
      "Sample string (Document 91) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' y', 'ou', 'ng', ' h', 'un', 'ts', 'ma', 'n ', 'wh', 'o ', 'we', 'nt', ' i', 'nt', 'o ', 'th', 'e ', 'fo', 're', 'st', ' t', 'o ', 'li', 'e ', 'in', '\\nw', 'ai', 't.', '  ', 'he', ' h', 'ad', ' a', ' f', 're', 'sh', ' a', 'nd', ' j', 'oy', 'ou', 's ']\n",
      "\n",
      "Processing file stories\\093.txt\n",
      "Data size (Characters) (Document 92) 2314\n",
      "Sample string (Document 92) ['a ', 'po', 'or', ' s', 'er', 'va', 'nt', '-g', 'ir', 'l ', 'wa', 's ', 'on', 'ce', ' t', 'ra', 've', 'li', 'ng', ' w', 'it', 'h ', 'th', 'e ', 'fa', 'mi', 'ly', ' w', 'it', 'h ', 'wh', 'ic', 'h ', 'sh', 'e\\n', 'wa', 's ', 'in', ' s', 'er', 'vi', 'ce', ', ', 'th', 'ro', 'ug', 'h ', 'a ', 'gr', 'ea']\n",
      "\n",
      "Processing file stories\\094.txt\n",
      "Data size (Characters) (Document 93) 1706\n",
      "Sample string (Document 93) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' m', 'an', ' w', 'ho', ' h', 'ad', ' t', 'hr', 'ee', ' s', 'on', 's,', ' a', 'nd', ' n', 'ot', 'hi', 'ng', ' e', 'ls', 'e ', 'in', ' t', 'he', '\\nw', 'or', 'ld', ' b', 'ut', ' t', 'he', ' h', 'ou', 'se', ' i', 'n ', 'wh', 'ic', 'h ', 'he', ' l', 'iv']\n",
      "\n",
      "Processing file stories\\095.txt\n",
      "Data size (Characters) (Document 94) 3229\n",
      "Sample string (Document 94) ['th', 'er', 'e ', 'wa', 's ', 'a ', 'gr', 'ea', 't ', 'wa', 'r,', ' a', 'nd', ' t', 'he', ' k', 'in', 'g ', 'ha', 'd ', 'ma', 'ny', ' s', 'ol', 'di', 'er', 's,', ' b', 'ut', ' g', 'av', 'e ', 'th', 'em', '\\ns', 'ma', 'll', ' p', 'ay', ', ', 'so', ' s', 'ma', 'll', ' t', 'ha', 't ', 'th', 'ey', ' c']\n",
      "\n",
      "Processing file stories\\096.txt\n",
      "Data size (Characters) (Document 95) 4954\n",
      "Sample string (Document 95) ['on', 'ce', ' u', 'po', 'n ', 'a ', 'ti', 'me', ' l', 'iv', 'ed', ' a', ' m', 'an', ' a', 'nd', ' a', ' w', 'om', 'an', ' w', 'ho', ' s', 'o ', 'lo', 'ng', ' a', 's ', 'th', 'ey', ' w', 'er', 'e\\n', 'ri', 'ch', ' h', 'ad', ' n', 'o ', 'ch', 'il', 'dr', 'en', ', ', 'bu', 't ', 'wh', 'en', ' t', 'he']\n",
      "\n",
      "Processing file stories\\097.txt\n",
      "Data size (Characters) (Document 96) 5732\n",
      "Sample string (Document 96) ['in', ' t', 'he', ' d', 'ay', 's ', 'wh', 'en', ' w', 'is', 'hi', 'ng', ' w', 'as', ' s', 'ti', 'll', ' o', 'f ', 'so', 'me', ' u', 'se', ', ', 'a ', 'ki', 'ng', \"'s\", ' s', 'on', ' w', 'as', '\\nb', 'ew', 'it', 'ch', 'ed', ' b', 'y ', 'an', ' o', 'ld', ' w', 'it', 'ch', ', ', 'an', 'd ', 'sh', 'ut']\n",
      "\n",
      "Processing file stories\\098.txt\n",
      "Data size (Characters) (Document 97) 4334\n",
      "Sample string (Document 97) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' p', 'oo', 'r ', 'ma', 'n ', 'wh', 'o ', 'ha', 'd ', 'fo', 'ur', ' s', 'on', 's,', ' a', 'nd', ' w', 'he', 'n ', 'th', 'ey', ' w', 'er', 'e ', 'gr', 'ow', 'n\\n', 'up', ', ', 'he', ' s', 'ai', 'd ', 'to', ' t', 'he', 'm,', ' \"', 'my', ' d', 'ea', 'r ']\n",
      "\n",
      "Processing file stories\\099.txt\n",
      "Data size (Characters) (Document 98) 7090\n",
      "Sample string (Document 98) ['th', 'er', 'e ', 'wa', 's ', 'on', 'ce', ' a', ' w', 'om', 'an', ' w', 'ho', ' h', 'ad', ' t', 'hr', 'ee', ' d', 'au', 'gh', 'te', 'rs', ', ', 'th', 'e ', 'el', 'de', 'st', ' o', 'f ', 'wh', 'om', '\\nw', 'as', ' c', 'al', 'le', 'd ', 'on', 'e-', 'ey', 'e,', ' b', 'ec', 'au', 'se', ' s', 'he', ' h']\n",
      "\n",
      "Processing file stories\\100.txt\n",
      "Data size (Characters) (Document 99) 1007\n",
      "Sample string (Document 99) ['\"g', 'oo', 'd-', 'da', 'y,', ' f', 'at', 'he', 'r ', 'ho', 'll', 'en', 'th', 'e.', '\" ', '\"m', 'an', 'y ', 'th', 'an', 'ks', ', ', 'pi', 'f-', 'pa', 'f-', 'po', 'lt', 'ri', 'e.', '\" ', '\"m', 'ay', ' i', '\\nb', 'e ', 'al', 'lo', 'we', 'd ', 'to', ' h', 'av', 'e ', 'yo', 'ur', ' d', 'au', 'gh', 'te']\n"
     ]
    }
   ],
   "source": [
    "def read_data(filename):\n",
    "  \n",
    "  with open(filename) as f:\n",
    "    data = tf.compat.as_str(f.read())\n",
    "    data = data.lower()\n",
    "    data = list(data)\n",
    "  return data\n",
    "\n",
    "documents = []\n",
    "global documents\n",
    "for i in range(num_files):    \n",
    "    print('\\nProcessing file %s'%os.path.join(dir_name,filenames[i]))\n",
    "    chars = read_data(os.path.join(dir_name,filenames[i]))\n",
    "    two_grams = [''.join(chars[ch_i:ch_i+2]) for ch_i in range(0,len(chars)-2,2)]\n",
    "    documents.append(two_grams)\n",
    "    print('Data size (Characters) (Document %d) %d' %(i,len(two_grams)))\n",
    "    print('Sample string (Document %d) %s'%(i,two_grams[:50]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Building the Dictionaries (Bigrams)\n",
    "Builds the following. To understand each of these elements, let us also assume the text \"I like to go to school\"\n",
    "\n",
    "* `dictionary`: maps a string word to an ID (e.g. {I:0, like:1, to:2, go:3, school:4})\n",
    "* `reverse_dictionary`: maps an ID to a string word (e.g. {0:I, 1:like, 2:to, 3:go, 4:school}\n",
    "* `count`: List of list of (word, frequency) elements (e.g. [(I,1),(like,1),(to,2),(go,1),(school,1)]\n",
    "* `data` : Contain the string of text we read, where string words are replaced with word IDs (e.g. [0, 1, 2, 3, 2, 4])\n",
    "\n",
    "It also introduces an additional special token `UNK` to denote rare words to are too rare to make use of."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "449177 Characters found.\n",
      "Most common words (+UNK) [('e ', 15229), ('he', 15164), (' t', 13443), ('th', 13076), ('d ', 10687)]\n",
      "Least common words (+UNK) [('\\tc', 1), ('iu', 1), ('zi', 1), ('yr', 1), (\"x'\", 1), (' z', 1), ('i?', 1), ('hc', 1), ('nm', 1), ('bj', 1), ('f?', 1), (\"'p\", 1), ('dh', 1), ('pk', 1), ('c.', 1)]\n",
      "Sample data [15, 28, 86, 23, 3, 95, 74, 11, 2, 16]\n",
      "Sample data [22, 156, 25, 37, 83, 185, 43, 9, 90, 19]\n",
      "Vocabulary:  544\n"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "def build_dataset(documents):\n",
    "    chars = []\n",
    "    # This is going to be a list of lists\n",
    "    # Where the outer list denote each document\n",
    "    # and the inner lists denote words in a given document\n",
    "    data_list = []\n",
    "  \n",
    "    for d in documents:\n",
    "        chars.extend(d)\n",
    "    print('%d Characters found.'%len(chars))\n",
    "    count = []\n",
    "    # Get the bigram sorted by their frequency (Highest comes first)\n",
    "    count.extend(collections.Counter(chars).most_common())\n",
    "    \n",
    "    # Create an ID for each bigram by giving the current length of the dictionary\n",
    "    # And adding that item to the dictionary\n",
    "    # Start with 'UNK' that is assigned to too rare words\n",
    "    dictionary = dict({'UNK':0})\n",
    "    for char, c in count:\n",
    "        # Only add a bigram to dictionary if its frequency is more than 10\n",
    "        if c > 10:\n",
    "            dictionary[char] = len(dictionary)    \n",
    "    \n",
    "    unk_count = 0\n",
    "    # Traverse through all the text we have\n",
    "    # to replace each string word with the ID of the word\n",
    "    for d in documents:\n",
    "        data = list()\n",
    "        for char in d:\n",
    "            # If word is in the dictionary use the word ID,\n",
    "            # else use the ID of the special token \"UNK\"\n",
    "            if char in dictionary:\n",
    "                index = dictionary[char]        \n",
    "            else:\n",
    "                index = dictionary['UNK']\n",
    "                unk_count += 1\n",
    "            data.append(index)\n",
    "            \n",
    "        data_list.append(data)\n",
    "        \n",
    "    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) \n",
    "    return data_list, count, dictionary, reverse_dictionary\n",
    "\n",
    "global data_list, count, dictionary, reverse_dictionary,vocabulary_size\n",
    "\n",
    "# Print some statistics about data\n",
    "data_list, count, dictionary, reverse_dictionary = build_dataset(documents)\n",
    "print('Most common words (+UNK)', count[:5])\n",
    "print('Least common words (+UNK)', count[-15:])\n",
    "print('Sample data', data_list[0][:10])\n",
    "print('Sample data', data_list[1][:10])\n",
    "print('Vocabulary: ',len(dictionary))\n",
    "vocabulary_size = len(dictionary)\n",
    "del documents  # To reduce memory."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generating Batches of Data\n",
    "The following object generates a batch of data which will be used to train the RNN. More specifically the generator breaks a given sequence of words into `batch_size` segments. We also maintain a cursor for each segment. So whenever we create a batch of data, we sample one item from each segment and update the cursor of each segment. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "Unrolled index 0\n",
      "\tInputs:\n",
      "\te  (1), \tki (131), \t d (48), \t w (11), \tbe (70), \n",
      "\tOutput:\n",
      "\tli (98), \tng (33), \tau (195), \ter (14), \tau (195), \n",
      "\n",
      "Unrolled index 1\n",
      "\tInputs:\n",
      "\tli (98), \tng (33), \tau (195), \ter (14), \tau (195), \n",
      "\tOutput:\n",
      "\tve (41), \t\n",
      "w (169), \tgh (106), \te  (1), \tti (112), \n",
      "\n",
      "Unrolled index 2\n",
      "\tInputs:\n",
      "\tve (41), \t\n",
      "w (169), \tgh (106), \te  (1), \tti (112), \n",
      "\tOutput:\n",
      "\td  (5), \tho (62), \tte (61), \tal (84), \tfu (229), \n",
      "\n",
      "Unrolled index 3\n",
      "\tInputs:\n",
      "\td  (5), \tho (62), \tte (61), \tal (84), \tfu (229), \n",
      "\tOutput:\n",
      "\ta  (83), \tse (58), \trs (137), \tl  (56), \tl, (257), \n",
      "\n",
      "Unrolled index 4\n",
      "\tInputs:\n",
      "\ta  (83), \tse (58), \trs (137), \tl  (56), \tbe (70), \n",
      "\tOutput:\n",
      "\tki (131), \t d (48), \t w (11), \tbe (70), \tau (195), "
     ]
    }
   ],
   "source": [
    "class DataGeneratorOHE(object):\n",
    "    \n",
    "    def __init__(self,text,batch_size,num_unroll):\n",
    "        # Text where a bigram is denoted by its ID\n",
    "        self._text = text\n",
    "        # Number of bigrams in the text\n",
    "        self._text_size = len(self._text)\n",
    "        # Number of datapoints in a batch of data\n",
    "        self._batch_size = batch_size\n",
    "        # Num unroll is the number of steps we unroll the RNN in a single training step\n",
    "        # This relates to the truncated backpropagation we discuss in Chapter 6 text\n",
    "        self._num_unroll = num_unroll\n",
    "        # We break the text in to several segments and the batch of data is sampled by\n",
    "        # sampling a single item from a single segment\n",
    "        self._segments = self._text_size//self._batch_size\n",
    "        self._cursor = [offset * self._segments for offset in range(self._batch_size)]\n",
    "        \n",
    "    def next_batch(self):\n",
    "        '''\n",
    "        Generates a single batch of data\n",
    "        '''\n",
    "        # Train inputs (one-hot-encoded) and train outputs (one-hot-encoded)\n",
    "        batch_data = np.zeros((self._batch_size,vocabulary_size),dtype=np.float32)\n",
    "        batch_labels = np.zeros((self._batch_size,vocabulary_size),dtype=np.float32)\n",
    "        \n",
    "        # Fill in the batch datapoint by datapoint\n",
    "        for b in range(self._batch_size):\n",
    "            # If the cursor of a given segment exceeds the segment length\n",
    "            # we reset the cursor back to the beginning of that segment\n",
    "            if self._cursor[b]+1>=self._text_size:\n",
    "                self._cursor[b] = b * self._segments\n",
    "            \n",
    "            # Add the text at the cursor as the input\n",
    "            batch_data[b,self._text[self._cursor[b]]] = 1.0\n",
    "            # Add the preceding bigram as the label to be predicted\n",
    "            batch_labels[b,self._text[self._cursor[b]+1]]= 1.0                       \n",
    "            # Update the cursor\n",
    "            self._cursor[b] = (self._cursor[b]+1)%self._text_size\n",
    "                    \n",
    "        return batch_data,batch_labels\n",
    "        \n",
    "    def unroll_batches(self):\n",
    "        '''\n",
    "        This produces a list of num_unroll batches\n",
    "        as required by a single step of training of the RNN\n",
    "        '''\n",
    "        unroll_data,unroll_labels = [],[]\n",
    "        for ui in range(self._num_unroll):\n",
    "            data, labels = self.next_batch()            \n",
    "            unroll_data.append(data)\n",
    "            unroll_labels.append(labels)\n",
    "        \n",
    "        return unroll_data, unroll_labels\n",
    "    \n",
    "    def reset_indices(self):\n",
    "        '''\n",
    "        Used to reset all the cursors if needed\n",
    "        '''\n",
    "        self._cursor = [offset * self._segments for offset in range(self._batch_size)]\n",
    "        \n",
    "# Running a tiny set to see if things are correct\n",
    "dg = DataGeneratorOHE(data_list[0][25:50],5,5)\n",
    "u_data, u_labels = dg.unroll_batches()\n",
    "\n",
    "# Iterate through each data batch in the unrolled set of batches\n",
    "for ui,(dat,lbl) in enumerate(zip(u_data,u_labels)):   \n",
    "    print('\\n\\nUnrolled index %d'%ui)\n",
    "    dat_ind = np.argmax(dat,axis=1)\n",
    "    lbl_ind = np.argmax(lbl,axis=1)\n",
    "    print('\\tInputs:')\n",
    "    for sing_dat in dat_ind:\n",
    "        print('\\t%s (%d)'%(reverse_dictionary[sing_dat],sing_dat),end=\", \")\n",
    "    print('\\n\\tOutput:')\n",
    "    for sing_lbl in lbl_ind:        \n",
    "        print('\\t%s (%d)'%(reverse_dictionary[sing_lbl],sing_lbl),end=\", \")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Recurrent Neural Network\n",
    "Here we implement and train our recurrent model that will take an output a new story"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Defining Hyperparameters\n",
    "\n",
    "Here we define several hyperparameters required.\n",
    "* `num_unroll`: Number of steps we unroll over time during optimizing\n",
    "* `batch_size`: Number of datapoints in a single batch\n",
    "* `hidden_1`: Number of hidden neurons in the state"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "num_unroll = 50\n",
    "batch_size = 64\n",
    "test_batch_size = 1\n",
    "\n",
    "hidden_sizes = [128,64,32]\n",
    "scopes = ['first','second','third']\n",
    "input_sizes = [vocabulary_size, 128, 64]\n",
    "out_size = vocabulary_size"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Defining Inputs and Outputs\n",
    "Here we define training inputs (`train_dataset`) and outputs (`train_labels`), validation inputs (`valid_dataset`) and outputs (`valid_labels`) and test inputs (`test_dataset`)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Train dataset\n",
    "# We use unrolling over time\n",
    "train_dataset, train_labels = [],[]\n",
    "for ui in range(num_unroll):\n",
    "    train_dataset.append(tf.placeholder(tf.float32, shape=[batch_size,input_sizes[0]],name='train_dataset_%d'%ui))\n",
    "    train_labels.append(tf.placeholder(tf.float32, shape=[batch_size,out_size],name='train_labels_%d'%ui))\n",
    "\n",
    "# Validation dataset    \n",
    "valid_dataset = tf.placeholder(tf.float32, shape=[1,input_sizes[0]],name='valid_dataset')\n",
    "valid_labels = tf.placeholder(tf.float32, shape=[1,out_size],name='valid_labels')\n",
    "\n",
    "# Test dataset\n",
    "test_dataset = tf.placeholder(tf.float32, shape=[test_batch_size,input_sizes[0]],name='save_test_dataset')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Defining Model Parameters and Other Variables\n",
    "Here we define model parameters. First we define two sets of weights (`W_xh` and `W_hh`) for each layer and a final output layer (`W_hy`). We also define a variable to maintain the hidden state. There needs to be three separate variables for the hidden state to be used during training(`train_h`), validation (`valid_h`) and testing (`test_h`) for each layer."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Weights shape:  first /W_xh [544, 128]\n",
      "Weights shape:  first /W_hh [128, 128]\n",
      "Weights shape:  second /W_xh [128, 64]\n",
      "Weights shape:  second /W_hh [64, 64]\n",
      "Weights shape:  third /W_xh [64, 32]\n",
      "Weights shape:  third /W_hh [32, 32]\n"
     ]
    }
   ],
   "source": [
    "# We will use variable scoping to define variables in multi layer RNN\n",
    "\n",
    "for scope, h, i in zip(scopes,hidden_sizes, input_sizes):\n",
    "    with tf.variable_scope(scope):\n",
    "        print('Weights shape: ',scope,'/W_xh',[i,h])\n",
    "        print('Weights shape: ',scope,'/W_hh',[h,h])\n",
    "        # Weights between inputs and h1\n",
    "        tf.get_variable('W_xh', shape=[i, h], initializer=tf.truncated_normal_initializer(stddev=0.02))\n",
    "\n",
    "        # Weights between h1 and h1\n",
    "        tf.get_variable('W_hh',shape=[h,h], initializer = tf.truncated_normal_initializer(stddev=0.02))\n",
    "\n",
    "        # Maintain the previous state of hidden nodes in an un-trainable variable (Training data)\n",
    "        tf.get_variable('train_h',shape=[batch_size,h], initializer=tf.zeros_initializer(), trainable=False)\n",
    "        # Maintain the previous state of hidden nodes in an un-trainable variable (Validation data)\n",
    "        tf.get_variable('valid_h',shape=[1,h], initializer=tf.zeros_initializer(), trainable=False)\n",
    "        # Test state\n",
    "        tf.get_variable('test_h',shape=[1,h], initializer=tf.zeros_initializer(), trainable=False)\n",
    "        \n",
    "# Weights between last state and y\n",
    "with tf.variable_scope('out'):\n",
    "    tf.get_variable('W_hy',shape=[hidden_sizes[-1], out_size], initializer=tf.truncated_normal_initializer(stddev=0.02))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Defining Inference of the RNN\n",
    "This is the most crucial bit of RNN and what makes it different from feed forward networks. Here we define operations related to:\n",
    "* Define RNN computations as a function `rnn_cell`\n",
    "* Calculating training/validation/test hidden outputs\n",
    "* Calculating training/validation/test predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Initial update to all the states\n",
      "\n",
      " Defined training stage RNN computations\n",
      "\n",
      " Defined training state update ops\n",
      "\n",
      " Definined training predictions\n",
      "\n",
      " Defined validation stage RNN computations\n",
      "\n",
      " Defined validation state update ops\n",
      "\n",
      " Definined validation predictions\n",
      "\n",
      " Defined testing stage RNN computations\n",
      "\n",
      " Defined testing state update ops\n",
      "\n",
      " Definined testing predictions\n"
     ]
    }
   ],
   "source": [
    "def rnn_cell(scope, x, h_minus_1):\n",
    "    '''\n",
    "    Define computations of the RNN cell\n",
    "    '''\n",
    "    with tf.variable_scope(scope, reuse=True):\n",
    "        W_xh, W_hh = tf.get_variable('W_xh'), tf.get_variable('W_hh') \n",
    "        h = tf.nn.tanh(tf.matmul(tf.concat([x, h_minus_1],1), tf.concat([W_xh, W_hh],0)))\n",
    "        \n",
    "        return h\n",
    "\n",
    "\n",
    "# ===============================================================================\n",
    "# Train score (unnormalized) values and predictions (normalized)\n",
    "y_scores, y_predictions = [],[]\n",
    "\n",
    "# Setting the initial state to get the current state of the RNN\n",
    "# training ,validation and testing phases\n",
    "next_state_h, next_valid_state_h, next_test_state_h = [],[],[]\n",
    "for scope in scopes:\n",
    "    with tf.variable_scope(scope, reuse=True):\n",
    "        next_state_h.append(tf.get_variable('train_h'))\n",
    "        next_valid_state_h.append(tf.get_variable('valid_h'))\n",
    "        next_test_state_h.append(tf.get_variable('test_h'))\n",
    "print('Initial update to all the states')\n",
    "\n",
    "# Maintains the last state output for all the layers\n",
    "last_state_unrolled = [] \n",
    "\n",
    "# Calculating the output of the RNN for num_unroll steps\n",
    "# (as required by the truncated BPTT)\n",
    "for ui in range(num_unroll):\n",
    "    x = train_dataset[ui]\n",
    "    for lyr_i, scope in enumerate(scopes):\n",
    "        # Recursively compute the RNN output\n",
    "        next_state_h[lyr_i] = rnn_cell(scope, x, next_state_h[lyr_i])\n",
    "        # Set the previous layer's output as the next layers input\n",
    "        x = next_state_h[lyr_i]\n",
    "            \n",
    "    last_state_unrolled.append(x)\n",
    "print('\\n Defined training stage RNN computations')\n",
    "\n",
    "# Updating the state variables with the latest state output at Training phase\n",
    "tf_train_state_update_ops = []\n",
    "for lyr_i, scope in enumerate(scopes):\n",
    "    with tf.variable_scope(scope, reuse=True):\n",
    "        tf_train_state_update_ops.append(\n",
    "            tf.assign(tf.get_variable('train_h'),next_state_h[lyr_i])\n",
    "        )\n",
    "print('\\n Defined training state update ops')\n",
    "\n",
    "with tf.variable_scope('out',reuse=True):\n",
    "    W_hy = tf.get_variable('W_hy')\n",
    "    # Get the scores and predictions for all the RNN outputs we produced for num_unroll steps\n",
    "    y_scores = [tf.matmul(last_state_unrolled[ui],W_hy) for ui in range(num_unroll)]\n",
    "    y_predictions = [tf.nn.softmax(y_scores[ui]) for ui in range(num_unroll)]\n",
    "    \n",
    "    # We calculate train perplexity with the predictions made by the RNN\n",
    "    train_perplexity_without_exp = tf.reduce_sum(\n",
    "        tf.concat(train_labels,0)*-tf.log(tf.concat(y_predictions,0)+1e-10))/(num_unroll*batch_size)\n",
    "print('\\n Definined training predictions')\n",
    "\n",
    "# ===============================================================================\n",
    "# Validation data related inference logic \n",
    "# (very similar to the training inference logic)\n",
    "\n",
    "# Compute the next valid state (only for 1 step)\n",
    "x = valid_dataset\n",
    "last_valid_state = None\n",
    "for lyr_i, scope in enumerate(scopes):\n",
    "    # Recursively compute the RNN output (validation)\n",
    "    next_valid_state_h[lyr_i] = rnn_cell(scope, x, next_valid_state_h[lyr_i])\n",
    "    # Set the previous layer's output as the next layers input (validation)\n",
    "    x = next_valid_state_h[lyr_i]\n",
    "    \n",
    "last_valid_state = next_valid_state_h[-1]\n",
    "print('\\n Defined validation stage RNN computations')\n",
    "\n",
    "# Updating the state variables with the latest state output at validation phase\n",
    "tf_valid_state_update_ops = []\n",
    "for lyr_i, scope in enumerate(scopes):\n",
    "    with tf.variable_scope(scope, reuse=True):\n",
    "        tf_valid_state_update_ops.append(\n",
    "            tf.assign(tf.get_variable('valid_h'),next_valid_state_h[lyr_i])\n",
    "        )\n",
    "print('\\n Defined validation state update ops')\n",
    "\n",
    "with tf.control_dependencies(tf_valid_state_update_ops):\n",
    "    with tf.variable_scope('out',reuse=True):\n",
    "        W_hy = tf.get_variable('W_hy')\n",
    "        valid_scores = tf.matmul(last_valid_state,W_hy) \n",
    "        valid_predictions = tf.nn.softmax(valid_scores)\n",
    "\n",
    "valid_perplexity_without_exp = tf.reduce_sum(tf.concat(valid_labels,0)*-tf.log(tf.concat(valid_predictions,0)+1e-10))\n",
    "print('\\n Definined validation predictions')\n",
    "# ===============================================================================\n",
    "# Test data realted inference logic\n",
    "\n",
    "# Calculating hidden output for test data\n",
    "x = test_dataset\n",
    "last_test_state = None\n",
    "for lyr_i, scope in enumerate(scopes):\n",
    "    # Recursively compute the RNN output (test)\n",
    "    next_test_state_h[lyr_i] = rnn_cell(scope, x, next_test_state_h[lyr_i])\n",
    "    # Set the previous layer's output as the next layers input (test)\n",
    "    x = next_test_state_h[lyr_i]\n",
    "    \n",
    "last_test_state = next_test_state_h[-1]\n",
    "print('\\n Defined testing stage RNN computations')\n",
    "\n",
    "# Updating the state variables with the latest state output at test phase\n",
    "tf_test_state_update_ops = []\n",
    "for lyr_i, scope in enumerate(scopes):\n",
    "    with tf.variable_scope(scope, reuse=True):\n",
    "        tf_test_state_update_ops.append(\n",
    "            tf.assign(tf.get_variable('test_h'),next_test_state_h[lyr_i])\n",
    "        )\n",
    "print('\\n Defined testing state update ops')\n",
    "\n",
    "with tf.control_dependencies(tf_test_state_update_ops):\n",
    "    with tf.variable_scope('out',reuse=True):\n",
    "        W_hy = tf.get_variable('W_hy')\n",
    "        test_scores = tf.matmul(last_test_state,W_hy) \n",
    "        test_predictions = tf.nn.softmax(test_scores)\n",
    "        \n",
    "print('\\n Definined testing predictions')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Calculating RNN Loss\n",
    "We calculate the training and validation loss of RNN here. It's a typical cross entropy loss calculated over all the scores we obtained for training data (`rnn_loss`) and validation data (`rnn_valid_loss`)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with tf.control_dependencies(tf_train_state_update_ops):\n",
    "    rnn_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(\n",
    "        logits=tf.concat(y_scores,0), labels=tf.concat(train_labels,0)\n",
    "    ))\n",
    "\n",
    "rnn_valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(\n",
    "       logits=valid_scores, labels=valid_labels))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Defining Learning Rate and the Optimizer with Gradient Clipping\n",
    "Here we define the learning rate and the optimizer we're going to use. We will be using the Adam optimizer as it is one of the best optimizers out there. Furthermore we use gradient clipping to prevent any gradient explosions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "rnn_optimizer = tf.train.AdamOptimizer(learning_rate=0.0005)\n",
    "\n",
    "gradients, v = zip(*rnn_optimizer.compute_gradients(rnn_loss))\n",
    "gradients, _ = tf.clip_by_global_norm(gradients, 5.0)\n",
    "rnn_optimizer = rnn_optimizer.apply_gradients(zip(gradients, v))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Resetting Operations for Resetting Hidden States\n",
    "Sometimes the state variable needs to be reset (e.g. when starting predictions at a beginning of a new epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "training_reset_ops, valid_reset_ops, test_reset_ops = [],[],[]\n",
    "for lyr_i, (scope,h) in enumerate(zip(scopes,hidden_sizes)):\n",
    "    with tf.variable_scope(scope, reuse=True):\n",
    "        training_reset_ops.append(tf.assign(tf.get_variable('train_h'),tf.zeros([batch_size,h],dtype=tf.float32)))\n",
    "        valid_reset_ops.append(tf.assign(tf.get_variable('valid_h'),tf.zeros([1,h],dtype=tf.float32)))\n",
    "        test_reset_ops.append(tf.assign(tf.get_variable('test_h'),tf.zeros([1,h],dtype=tf.float32)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prediction Sampling\n",
    "We select the word corresponding to the highest index of the prediction vector. We will later see different sampling strategies."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def sample(distribution):\n",
    "  '''\n",
    "  Sample a word from the prediction distribution\n",
    "  '''  \n",
    "  best_idx = np.argmax(distribution)\n",
    "  return best_idx"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Running the RNN to Generate Text\n",
    "\n",
    "Here we train the RNN on the available data and generate text using the trained RNN for several steps. First we create a validation set by extracting text snippets (that are not present in training data) from longer documents. Then at each training step, we train the RNN on several randomly picked documents. From each document we extract text for `steps_per_document` steps. We also report the train and validation perplexities at the end of each step. Finally we test the RNN by asking it to generate some new text starting from a randomly picked bigram."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Initialized\n",
      "\n",
      "\n",
      "Document 52 Step 1 processed (Perplexity: 318.15).\n",
      "Document 10 Step 1 processed (Perplexity: 175.25).\n",
      "Document 53 Step 1 processed (Perplexity: 165.81).\n",
      "Document 49 Step 1 processed (Perplexity: 179.31).\n",
      "Document 88 Step 1 processed (Perplexity: 188.42).\n",
      "Document 27 Step 1 processed (Perplexity: 178.92).\n",
      "Document 40 Step 1 processed (Perplexity: 178.10).\n",
      "Document 39 Step 1 processed (Perplexity: 155.19).\n",
      "Document 96 Step 1 processed (Perplexity: 175.38).\n",
      "Document 9 Step 1 processed (Perplexity: 169.69).\n",
      "Average loss at step 1: 1.043643\n",
      "\tPerplexity at step 1: 2.839544\n",
      "\n",
      "Valid Perplexity: 200.34\n",
      "\n",
      "Generated Text after epoch 0 ... \n",
      "======================== New text Segment ==========================\n",
      "\t skhehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehe\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 39 Step 2 processed (Perplexity: 153.02).\n",
      "Document 75 Step 2 processed (Perplexity: 185.40).\n",
      "Document 55 Step 2 processed (Perplexity: 168.58).\n",
      "Document 12 Step 2 processed (Perplexity: 165.92).\n",
      "Document 8 Step 2 processed (Perplexity: 175.30).\n",
      "Document 53 Step 2 processed (Perplexity: 156.91).\n",
      "Document 6 Step 2 processed (Perplexity: 177.62).\n",
      "Document 18 Step 2 processed (Perplexity: 155.48).\n",
      "Document 96 Step 2 processed (Perplexity: 171.99).\n",
      "Document 46 Step 2 processed (Perplexity: 176.05).\n",
      "\n",
      "\n",
      "Document 17 Step 3 processed (Perplexity: 161.30).\n",
      "Document 69 Step 3 processed (Perplexity: 166.28).\n",
      "Document 62 Step 3 processed (Perplexity: 154.70).\n",
      "Document 24 Step 3 processed (Perplexity: 166.74).\n",
      "Document 79 Step 3 processed (Perplexity: 154.53).\n",
      "Document 25 Step 3 processed (Perplexity: 170.66).\n",
      "Document 33 Step 3 processed (Perplexity: 186.43).\n",
      "Document 27 Step 3 processed (Perplexity: 175.84).\n",
      "Document 58 Step 3 processed (Perplexity: 122.71).\n",
      "Document 14 Step 3 processed (Perplexity: 186.91).\n",
      "\n",
      "\n",
      "Document 25 Step 4 processed (Perplexity: 159.13).\n",
      "Document 49 Step 4 processed (Perplexity: 175.69).\n",
      "Document 23 Step 4 processed (Perplexity: 152.51).\n",
      "Document 42 Step 4 processed (Perplexity: 194.41).\n",
      "Document 46 Step 4 processed (Perplexity: 172.34).\n",
      "Document 65 Step 4 processed (Perplexity: 171.12).\n",
      "Document 97 Step 4 processed (Perplexity: 181.99).\n",
      "Document 79 Step 4 processed (Perplexity: 153.80).\n",
      "Document 24 Step 4 processed (Perplexity: 169.03).\n",
      "Document 32 Step 4 processed (Perplexity: 180.31).\n",
      "\n",
      "\n",
      "Document 85 Step 5 processed (Perplexity: 175.42).\n",
      "Document 82 Step 5 processed (Perplexity: 159.98).\n",
      "Document 81 Step 5 processed (Perplexity: 193.31).\n",
      "Document 56 Step 5 processed (Perplexity: 163.46).\n",
      "Document 88 Step 5 processed (Perplexity: 192.39).\n",
      "Document 97 Step 5 processed (Perplexity: 179.75).\n",
      "Document 48 Step 5 processed (Perplexity: 166.41).\n",
      "Document 72 Step 5 processed (Perplexity: 169.78).\n",
      "Document 68 Step 5 processed (Perplexity: 170.44).\n",
      "Document 80 Step 5 processed (Perplexity: 160.52).\n",
      "\n",
      "\n",
      "Document 20 Step 6 processed (Perplexity: 164.61).\n",
      "Document 29 Step 6 processed (Perplexity: 175.95).\n",
      "Document 19 Step 6 processed (Perplexity: 183.77).\n",
      "Document 9 Step 6 processed (Perplexity: 169.58).\n",
      "Document 21 Step 6 processed (Perplexity: 175.21).\n",
      "Document 85 Step 6 processed (Perplexity: 169.57).\n",
      "Document 89 Step 6 processed (Perplexity: 95.61).\n",
      "Document 88 Step 6 processed (Perplexity: 208.01).\n",
      "Document 47 Step 6 processed (Perplexity: 163.89).\n",
      "Document 63 Step 6 processed (Perplexity: 179.20).\n",
      "Average loss at step 6: 5.124974\n",
      "\tPerplexity at step 6: 168.169783\n",
      "\n",
      "Valid Perplexity: 190.17\n",
      "\n",
      "Generated Text after epoch 5 ... \n",
      "======================== New text Segment ==========================\n",
      "\t inhehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehehe\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 95 Step 7 processed (Perplexity: 159.99).\n",
      "Document 8 Step 7 processed (Perplexity: 176.92).\n",
      "Document 82 Step 7 processed (Perplexity: 161.16).\n",
      "Document 97 Step 7 processed (Perplexity: 179.43).\n",
      "Document 6 Step 7 processed (Perplexity: 170.28).\n",
      "Document 38 Step 7 processed (Perplexity: 164.61).\n",
      "Document 40 Step 7 processed (Perplexity: 179.78).\n",
      "Document 43 Step 7 processed (Perplexity: 166.76).\n",
      "Document 63 Step 7 processed (Perplexity: 175.29).\n",
      "Document 45 Step 7 processed (Perplexity: 172.37).\n",
      "\n",
      "\n",
      "Document 96 Step 8 processed (Perplexity: 161.35).\n",
      "Document 22 Step 8 processed (Perplexity: 130.30).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Document 30 Step 8 processed (Perplexity: 93.19).\n",
      "Document 15 Step 8 processed (Perplexity: 116.53).\n",
      "Document 79 Step 8 processed (Perplexity: 73.08).\n",
      "Document 63 Step 8 processed (Perplexity: 105.19).\n",
      "Document 89 Step 8 processed (Perplexity: 30.44).\n",
      "Document 85 Step 8 processed (Perplexity: 88.94).\n",
      "Document 36 Step 8 processed (Perplexity: 57.04).\n",
      "Document 90 Step 8 processed (Perplexity: 64.96).\n",
      "\n",
      "\n",
      "Document 41 Step 9 processed (Perplexity: 62.62).\n",
      "Document 0 Step 9 processed (Perplexity: 51.44).\n",
      "Document 53 Step 9 processed (Perplexity: 36.22).\n",
      "Document 27 Step 9 processed (Perplexity: 51.91).\n",
      "Document 60 Step 9 processed (Perplexity: 54.19).\n",
      "Document 95 Step 9 processed (Perplexity: 34.04).\n",
      "Document 71 Step 9 processed (Perplexity: 30.20).\n",
      "Document 44 Step 9 processed (Perplexity: 36.32).\n",
      "Document 75 Step 9 processed (Perplexity: 37.82).\n",
      "Document 33 Step 9 processed (Perplexity: 36.24).\n",
      "\n",
      "\n",
      "Document 37 Step 10 processed (Perplexity: 32.28).\n",
      "Document 39 Step 10 processed (Perplexity: 22.02).\n",
      "Document 36 Step 10 processed (Perplexity: 23.14).\n",
      "Document 91 Step 10 processed (Perplexity: 38.09).\n",
      "Document 30 Step 10 processed (Perplexity: 18.62).\n",
      "Document 78 Step 10 processed (Perplexity: 30.95).\n",
      "Document 44 Step 10 processed (Perplexity: 24.82).\n",
      "Document 56 Step 10 processed (Perplexity: 17.06).\n",
      "Document 43 Step 10 processed (Perplexity: 26.20).\n",
      "Document 21 Step 10 processed (Perplexity: 29.26).\n",
      "\n",
      "\n",
      "Document 17 Step 11 processed (Perplexity: 22.54).\n",
      "Document 81 Step 11 processed (Perplexity: 37.28).\n",
      "Document 32 Step 11 processed (Perplexity: 24.16).\n",
      "Document 34 Step 11 processed (Perplexity: 24.52).\n",
      "Document 10 Step 11 processed (Perplexity: 17.63).\n",
      "Document 13 Step 11 processed (Perplexity: 25.90).\n",
      "Document 71 Step 11 processed (Perplexity: 16.77).\n",
      "Document 35 Step 11 processed (Perplexity: 27.35).\n",
      "Document 88 Step 11 processed (Perplexity: 24.62).\n",
      "Document 47 Step 11 processed (Perplexity: 18.51).\n",
      "Average loss at step 11: 3.938997\n",
      "\tPerplexity at step 11: 51.367031\n",
      "\n",
      "Valid Perplexity: 63.90\n",
      "\n",
      "Generated Text after epoch 10 ... \n",
      "======================== New text Segment ==========================\n",
      "\t he and the drist an the more thears beon, and of the was to the more thear was to the most, the more thear was to the most on the more theet the brist the king's the more and the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said the that the said \n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 28 Step 12 processed (Perplexity: 14.79).\n",
      "Document 54 Step 12 processed (Perplexity: 20.86).\n",
      "Document 72 Step 12 processed (Perplexity: 22.29).\n",
      "Document 88 Step 12 processed (Perplexity: 17.45).\n",
      "Document 50 Step 12 processed (Perplexity: 21.51).\n",
      "Document 46 Step 12 processed (Perplexity: 28.23).\n",
      "Document 57 Step 12 processed (Perplexity: 19.77).\n",
      "Document 69 Step 12 processed (Perplexity: 18.59).\n",
      "Document 74 Step 12 processed (Perplexity: 17.07).\n",
      "Document 6 Step 12 processed (Perplexity: 21.09).\n",
      "\n",
      "\n",
      "Document 64 Step 13 processed (Perplexity: 20.05).\n",
      "Document 88 Step 13 processed (Perplexity: 15.01).\n",
      "Document 82 Step 13 processed (Perplexity: 17.59).\n",
      "Document 51 Step 13 processed (Perplexity: 8.94).\n",
      "Document 60 Step 13 processed (Perplexity: 28.99).\n",
      "Document 43 Step 13 processed (Perplexity: 14.65).\n",
      "Document 3 Step 13 processed (Perplexity: 16.68).\n",
      "Document 49 Step 13 processed (Perplexity: 16.99).\n",
      "Document 28 Step 13 processed (Perplexity: 9.33).\n",
      "Document 23 Step 13 processed (Perplexity: 12.40).\n",
      "\n",
      "\n",
      "Document 60 Step 14 processed (Perplexity: 24.62).\n",
      "Document 21 Step 14 processed (Perplexity: 19.01).\n",
      "Document 57 Step 14 processed (Perplexity: 16.34).\n",
      "Document 90 Step 14 processed (Perplexity: 18.94).\n",
      "Document 91 Step 14 processed (Perplexity: 20.81).\n",
      "Document 32 Step 14 processed (Perplexity: 15.22).\n",
      "Document 69 Step 14 processed (Perplexity: 15.31).\n",
      "Document 27 Step 14 processed (Perplexity: 20.41).\n",
      "Document 1 Step 14 processed (Perplexity: 19.47).\n",
      "Document 19 Step 14 processed (Perplexity: 15.95).\n",
      "\n",
      "\n",
      "Document 45 Step 15 processed (Perplexity: 18.73).\n",
      "Document 29 Step 15 processed (Perplexity: 15.53).\n",
      "Document 30 Step 15 processed (Perplexity: 10.16).\n",
      "Document 53 Step 15 processed (Perplexity: 11.17).\n",
      "Document 15 Step 15 processed (Perplexity: 21.10).\n",
      "Document 44 Step 15 processed (Perplexity: 15.12).\n",
      "Document 76 Step 15 processed (Perplexity: 15.86).\n",
      "Document 8 Step 15 processed (Perplexity: 18.45).\n",
      "Document 88 Step 15 processed (Perplexity: 15.69).\n",
      "Document 33 Step 15 processed (Perplexity: 17.87).\n",
      "\n",
      "\n",
      "Document 66 Step 16 processed (Perplexity: 20.28).\n",
      "Document 1 Step 16 processed (Perplexity: 15.08).\n",
      "Document 72 Step 16 processed (Perplexity: 15.81).\n",
      "Document 69 Step 16 processed (Perplexity: 12.79).\n",
      "Document 52 Step 16 processed (Perplexity: 10.87).\n",
      "Document 45 Step 16 processed (Perplexity: 15.72).\n",
      "Document 98 Step 16 processed (Perplexity: 19.65).\n",
      "Document 3 Step 16 processed (Perplexity: 12.68).\n",
      "Document 61 Step 16 processed (Perplexity: 20.68).\n",
      "Document 75 Step 16 processed (Perplexity: 16.39).\n",
      "Average loss at step 16: 2.824752\n",
      "\tPerplexity at step 16: 16.856756\n",
      "\n",
      "Valid Perplexity: 45.16\n",
      "\n",
      "Generated Text after epoch 15 ... \n",
      "======================== New text Segment ==========================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t and when she will of the bride, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, and said, \n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 49 Step 17 processed (Perplexity: 12.97).\n",
      "Document 21 Step 17 processed (Perplexity: 15.48).\n",
      "Document 36 Step 17 processed (Perplexity: 11.38).\n",
      "Document 81 Step 17 processed (Perplexity: 25.30).\n",
      "Document 13 Step 17 processed (Perplexity: 14.30).\n",
      "Document 83 Step 17 processed (Perplexity: 6.25).\n",
      "Document 15 Step 17 processed (Perplexity: 16.69).\n",
      "Document 50 Step 17 processed (Perplexity: 14.37).\n",
      "Document 55 Step 17 processed (Perplexity: 15.60).\n",
      "Document 0 Step 17 processed (Perplexity: 13.33).\n",
      "\n",
      "\n",
      "Document 57 Step 18 processed (Perplexity: 13.40).\n",
      "Document 37 Step 18 processed (Perplexity: 14.39).\n",
      "Document 90 Step 18 processed (Perplexity: 14.35).\n",
      "Document 28 Step 18 processed (Perplexity: 7.54).\n",
      "Document 25 Step 18 processed (Perplexity: 12.52).\n",
      "Document 74 Step 18 processed (Perplexity: 12.10).\n",
      "Document 22 Step 18 processed (Perplexity: 15.12).\n",
      "Document 83 Step 18 processed (Perplexity: 4.48).\n",
      "Document 44 Step 18 processed (Perplexity: 13.96).\n",
      "Document 71 Step 18 processed (Perplexity: 10.64).\n",
      "\n",
      "\n",
      "Document 12 Step 19 processed (Perplexity: 13.52).\n",
      "Document 88 Step 19 processed (Perplexity: 14.07).\n",
      "Document 28 Step 19 processed (Perplexity: 5.76).\n",
      "Document 57 Step 19 processed (Perplexity: 11.63).\n",
      "Document 25 Step 19 processed (Perplexity: 9.81).\n",
      "Document 26 Step 19 processed (Perplexity: 24.52).\n",
      "Document 54 Step 19 processed (Perplexity: 11.68).\n",
      "Document 59 Step 19 processed (Perplexity: 7.45).\n",
      "Document 40 Step 19 processed (Perplexity: 17.09).\n",
      "Document 29 Step 19 processed (Perplexity: 13.10).\n",
      "\n",
      "\n",
      "Document 47 Step 20 processed (Perplexity: 11.16).\n",
      "Document 29 Step 20 processed (Perplexity: 7.33).\n",
      "Document 42 Step 20 processed (Perplexity: 19.03).\n",
      "Document 95 Step 20 processed (Perplexity: 12.72).\n",
      "Document 16 Step 20 processed (Perplexity: 11.54).\n",
      "Document 48 Step 20 processed (Perplexity: 11.36).\n",
      "Document 56 Step 20 processed (Perplexity: 9.28).\n",
      "Document 81 Step 20 processed (Perplexity: 24.80).\n",
      "Document 62 Step 20 processed (Perplexity: 9.96).\n",
      "Document 35 Step 20 processed (Perplexity: 19.20).\n",
      "\n",
      "\n",
      "Document 37 Step 21 processed (Perplexity: 14.28).\n",
      "Document 28 Step 21 processed (Perplexity: 5.98).\n",
      "Document 44 Step 21 processed (Perplexity: 11.99).\n",
      "Document 12 Step 21 processed (Perplexity: 12.42).\n",
      "Document 46 Step 21 processed (Perplexity: 22.21).\n",
      "Document 10 Step 21 processed (Perplexity: 10.34).\n",
      "Document 7 Step 21 processed (Perplexity: 15.30).\n",
      "Document 6 Step 21 processed (Perplexity: 14.39).\n",
      "Document 85 Step 21 processed (Perplexity: 13.92).\n",
      "Document 60 Step 21 processed (Perplexity: 19.46).\n",
      "Average loss at step 21: 2.532281\n",
      "\tPerplexity at step 21: 12.582178\n",
      "\n",
      "Valid Perplexity: 33.24\n",
      "\n",
      "Generated Text after epoch 20 ... \n",
      "======================== New text Segment ==========================\n",
      "\t they said her.  then he thather and he was heaved in the king's daughter, and that they noters with her ofld have have in the have in her were not let it away, and that is said that he was to the king and then the king's lanves, and said, \"now well, and the brother luched, and that he had the still that he have nothing and said the brought that he had the still became to the brought that he have nothing and said the brought that he haves haves her not no the tout the king, and he was have the king and to the still of in the brother lucked, and that they noters with her ofld have have in the have in her were not let it away, and that is said that he was to the king and then the king's lanves, and said, \"now well, and the brother luched, and that he had the still that he have nothing and said the brought that he had the still became to the brought that he have nothing and said the brought that he haves haves her not no the tout the king, and he was have the king and to the still of in the brother lucked, and that they noters with her ofld have have in the have in her were not let it away, and that is said that he was to the king and then the king's lanves, and said, \"now well, and the brother luched, and that he had the still that he have nothing and said the brought that he had the still became to the brought that he have nothing and said the brought that he haves haves her not no the tout the king, and he was have the king and to the still of in the brother lucked, and that they noters with her ofld have have in the have in her were not let it away, and that is said that he was to the king and then the king's lanves, and said, \"now well, and the brother luched, and that he had the still that he have nothing and said the brought that he had the still became to the brought that he have nothing and said the brought that he haves haves her not no the tout the king, and he was have the king and to the still of in the brother lucked, and that they noters with her ofld have \n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 53 Step 22 processed (Perplexity: 8.52).\n",
      "Document 51 Step 22 processed (Perplexity: 5.75).\n",
      "Document 73 Step 22 processed (Perplexity: 18.97).\n",
      "Document 29 Step 22 processed (Perplexity: 11.29).\n",
      "Document 87 Step 22 processed (Perplexity: 8.28).\n",
      "Document 96 Step 22 processed (Perplexity: 14.24).\n",
      "Document 23 Step 22 processed (Perplexity: 9.28).\n",
      "Document 70 Step 22 processed (Perplexity: 13.01).\n",
      "Document 79 Step 22 processed (Perplexity: 10.26).\n",
      "Document 42 Step 22 processed (Perplexity: 17.44).\n",
      "\n",
      "\n",
      "Document 39 Step 23 processed (Perplexity: 9.23).\n",
      "Document 79 Step 23 processed (Perplexity: 5.06).\n",
      "Document 90 Step 23 processed (Perplexity: 14.97).\n",
      "Document 83 Step 23 processed (Perplexity: 4.79).\n",
      "Document 4 Step 23 processed (Perplexity: 23.00).\n",
      "Document 19 Step 23 processed (Perplexity: 13.08).\n",
      "Document 25 Step 23 processed (Perplexity: 10.86).\n",
      "Document 52 Step 23 processed (Perplexity: 9.32).\n",
      "Document 69 Step 23 processed (Perplexity: 12.39).\n",
      "Document 87 Step 23 processed (Perplexity: 6.80).\n",
      "\n",
      "\n",
      "Document 26 Step 24 processed (Perplexity: 22.20).\n",
      "Document 71 Step 24 processed (Perplexity: 9.74).\n",
      "Document 34 Step 24 processed (Perplexity: 12.40).\n",
      "Document 46 Step 24 processed (Perplexity: 22.18).\n",
      "Document 84 Step 24 processed (Perplexity: 12.91).\n",
      "Document 19 Step 24 processed (Perplexity: 9.87).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Document 30 Step 24 processed (Perplexity: 8.60).\n",
      "Document 45 Step 24 processed (Perplexity: 15.38).\n",
      "Document 65 Step 24 processed (Perplexity: 15.22).\n",
      "Document 35 Step 24 processed (Perplexity: 14.70).\n",
      "\n",
      "\n",
      "Document 68 Step 25 processed (Perplexity: 15.05).\n",
      "Document 19 Step 25 processed (Perplexity: 8.17).\n",
      "Document 55 Step 25 processed (Perplexity: 13.77).\n",
      "Document 95 Step 25 processed (Perplexity: 10.84).\n",
      "Document 53 Step 25 processed (Perplexity: 7.59).\n",
      "Document 47 Step 25 processed (Perplexity: 10.44).\n",
      "Document 83 Step 25 processed (Perplexity: 4.26).\n",
      "Document 89 Step 25 processed (Perplexity: 3.66).\n",
      "Document 77 Step 25 processed (Perplexity: 5.55).\n",
      "Document 18 Step 25 processed (Perplexity: 11.96).\n",
      "\n",
      "\n",
      "Document 67 Step 26 processed (Perplexity: 13.57).\n",
      "Document 20 Step 26 processed (Perplexity: 10.71).\n",
      "Document 85 Step 26 processed (Perplexity: 13.49).\n",
      "Document 55 Step 26 processed (Perplexity: 12.17).\n",
      "Document 28 Step 26 processed (Perplexity: 6.47).\n",
      "Document 27 Step 26 processed (Perplexity: 16.48).\n",
      "Document 14 Step 26 processed (Perplexity: 18.33).\n",
      "Document 29 Step 26 processed (Perplexity: 11.32).\n",
      "Document 52 Step 26 processed (Perplexity: 8.13).\n",
      "Document 63 Step 26 processed (Perplexity: 13.78).\n",
      "Average loss at step 26: 2.379086\n",
      "\tPerplexity at step 26: 10.795035\n",
      "\n",
      "Valid Perplexity: 54.00\n",
      "\n",
      "Generated Text after epoch 25 ... \n",
      "======================== New text Segment ==========================\n",
      "\t mother he he will the great happed his heart.  then the great han he therepomeed his old said, and said, and he said he her lighde he was brought her like she gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he had gold, and said, and he said to there, and they said, and he was so the great han he\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 8 Step 27 processed (Perplexity: 13.85).\n",
      "Document 46 Step 27 processed (Perplexity: 20.83).\n",
      "Document 65 Step 27 processed (Perplexity: 13.07).\n",
      "Document 73 Step 27 processed (Perplexity: 15.10).\n",
      "Document 33 Step 27 processed (Perplexity: 14.13).\n",
      "Document 83 Step 27 processed (Perplexity: 3.99).\n",
      "Document 96 Step 27 processed (Perplexity: 13.10).\n",
      "Document 37 Step 27 processed (Perplexity: 12.52).\n",
      "Document 44 Step 27 processed (Perplexity: 10.52).\n",
      "Document 7 Step 27 processed (Perplexity: 12.76).\n",
      "\n",
      "\n",
      "Document 86 Step 28 processed (Perplexity: 11.30).\n",
      "Document 25 Step 28 processed (Perplexity: 9.95).\n",
      "Document 19 Step 28 processed (Perplexity: 9.49).\n",
      "Document 33 Step 28 processed (Perplexity: 11.44).\n",
      "Document 26 Step 28 processed (Perplexity: 19.16).\n",
      "Document 81 Step 28 processed (Perplexity: 19.72).\n",
      "Document 99 Step 28 processed (Perplexity: 9.99).\n",
      "Document 6 Step 28 processed (Perplexity: 15.37).\n",
      "Document 80 Step 28 processed (Perplexity: 11.10).\n",
      "Document 30 Step 28 processed (Perplexity: 7.86).\n",
      "\n",
      "\n",
      "Document 71 Step 29 processed (Perplexity: 9.90).\n",
      "Document 24 Step 29 processed (Perplexity: 11.24).\n",
      "Document 63 Step 29 processed (Perplexity: 12.40).\n",
      "Document 55 Step 29 processed (Perplexity: 12.72).\n",
      "Document 29 Step 29 processed (Perplexity: 9.83).\n",
      "Document 67 Step 29 processed (Perplexity: 10.52).\n",
      "Document 36 Step 29 processed (Perplexity: 10.01).\n",
      "Document 69 Step 29 processed (Perplexity: 11.87).\n",
      "Document 11 Step 29 processed (Perplexity: 17.88).\n",
      "Document 28 Step 29 processed (Perplexity: 5.88).\n",
      "\n",
      "\n",
      "Document 69 Step 30 processed (Perplexity: 8.13).\n",
      "Document 21 Step 30 processed (Perplexity: 13.88).\n",
      "Document 83 Step 30 processed (Perplexity: 3.77).\n",
      "Document 10 Step 30 processed (Perplexity: 9.48).\n",
      "Document 14 Step 30 processed (Perplexity: 16.74).\n",
      "Document 43 Step 30 processed (Perplexity: 11.00).\n",
      "Document 27 Step 30 processed (Perplexity: 14.81).\n",
      "Document 95 Step 30 processed (Perplexity: 10.44).\n",
      "Document 39 Step 30 processed (Perplexity: 8.00).\n",
      "Document 2 Step 30 processed (Perplexity: 24.86).\n",
      "\n",
      "\n",
      "Document 86 Step 31 processed (Perplexity: 9.35).\n",
      "Document 20 Step 31 processed (Perplexity: 9.02).\n",
      "Document 19 Step 31 processed (Perplexity: 9.57).\n",
      "Document 11 Step 31 processed (Perplexity: 15.45).\n",
      "Document 44 Step 31 processed (Perplexity: 10.34).\n",
      "Document 51 Step 31 processed (Perplexity: 6.10).\n",
      "Document 96 Step 31 processed (Perplexity: 12.18).\n",
      "Document 73 Step 31 processed (Perplexity: 14.50).\n",
      "Document 30 Step 31 processed (Perplexity: 7.02).\n",
      "Document 79 Step 31 processed (Perplexity: 8.25).\n",
      "Average loss at step 31: 2.406628\n",
      "\tPerplexity at step 31: 11.096480\n",
      "\n",
      "Valid Perplexity: 70.27\n",
      "\n",
      "Generated Text after epoch 30 ... \n",
      "======================== New text Segment ==========================\n",
      "\t nst the packs have, and the had her browh hat the little\n",
      "strangen out it comlideres,s hund little the little\n",
      "struch was little\n",
      "strangend was plast of stays had tone,                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Document 27 Step 32 processed (Perplexity: 13.45).\n",
      "Document 40 Step 32 processed (Perplexity: 14.21).\n",
      "Document 47 Step 32 processed (Perplexity: 9.62).\n",
      "Document 52 Step 32 processed (Perplexity: 8.12).\n",
      "Document 96 Step 32 processed (Perplexity: 10.08).\n",
      "Document 45 Step 32 processed (Perplexity: 13.94).\n",
      "Document 78 Step 32 processed (Perplexity: 13.22).\n",
      "Document 54 Step 32 processed (Perplexity: 11.24).\n",
      "Document 1 Step 32 processed (Perplexity: 14.31).\n",
      "Document 49 Step 32 processed (Perplexity: 11.97).\n",
      "\n",
      "\n",
      "Document 11 Step 33 processed (Perplexity: 14.80).\n",
      "Document 52 Step 33 processed (Perplexity: 6.30).\n",
      "Document 29 Step 33 processed (Perplexity: 9.97).\n",
      "Document 44 Step 33 processed (Perplexity: 9.74).\n",
      "Document 98 Step 33 processed (Perplexity: 16.14).\n",
      "Document 71 Step 33 processed (Perplexity: 8.55).\n",
      "Document 81 Step 33 processed (Perplexity: 19.76).\n",
      "Document 57 Step 33 processed (Perplexity: 11.74).\n",
      "Document 96 Step 33 processed (Perplexity: 9.18).\n",
      "Document 93 Step 33 processed (Perplexity: 9.13).\n",
      "\n",
      "\n",
      "Document 86 Step 34 processed (Perplexity: 9.72).\n",
      "Document 21 Step 34 processed (Perplexity: 14.52).\n",
      "Document 97 Step 34 processed (Perplexity: 13.03).\n",
      "Document 60 Step 34 processed (Perplexity: 19.79).\n",
      "Document 35 Step 34 processed (Perplexity: 14.99).\n",
      "Document 90 Step 34 processed (Perplexity: 12.50).\n",
      "Document 55 Step 34 processed (Perplexity: 12.09).\n",
      "Document 8 Step 34 processed (Perplexity: 11.84).\n",
      "Document 25 Step 34 processed (Perplexity: 9.39).\n",
      "Document 19 Step 34 processed (Perplexity: 9.36).\n",
      "\n",
      "\n",
      "Document 48 Step 35 processed (Perplexity: 10.15).\n",
      "Document 47 Step 35 processed (Perplexity: 8.17).\n",
      "Document 17 Step 35 processed (Perplexity: 11.79).\n",
      "Document 19 Step 35 processed (Perplexity: 6.79).\n",
      "Document 42 Step 35 processed (Perplexity: 15.08).\n",
      "Document 90 Step 35 processed (Perplexity: 10.66).\n",
      "Document 13 Step 35 processed (Perplexity: 12.40).\n",
      "Document 54 Step 35 processed (Perplexity: 10.30).\n",
      "Document 71 Step 35 processed (Perplexity: 8.70).\n",
      "Document 32 Step 35 processed (Perplexity: 12.23).\n",
      "\n",
      "\n",
      "Document 65 Step 36 processed (Perplexity: 14.05).\n",
      "Document 43 Step 36 processed (Perplexity: 10.67).\n",
      "Document 92 Step 36 processed (Perplexity: 9.29).\n",
      "Document 38 Step 36 processed (Perplexity: 10.61).\n",
      "Document 31 Step 36 processed (Perplexity: 6.57).\n",
      "Document 10 Step 36 processed (Perplexity: 9.16).\n",
      "Document 66 Step 36 processed (Perplexity: 16.93).\n",
      "Document 55 Step 36 processed (Perplexity: 11.47).\n",
      "Document 13 Step 36 processed (Perplexity: 10.99).\n",
      "Document 98 Step 36 processed (Perplexity: 14.18).\n",
      "Average loss at step 36: 2.424117\n",
      "\tPerplexity at step 36: 11.292259\n",
      "\n",
      "Valid Perplexity: 45.26\n",
      "\n",
      "Generated Text after epoch 35 ... \n",
      "======================== New text Segment ==========================\n",
      "\t the tree for all three-eyes, and stritched them to the king's sowards and that they were they they went to them the was become to her great tree stranged that she was two-eyes to then the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as that to the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as that to the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as that to the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as that to the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as that to the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as that to the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as that to the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as that to the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as that to the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as that to the tree from the tree stranged the tree wome to seed the tree wanted to they were they that see-eyes, and two-eyes of strange with the tree ser it to man as \n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 24 Step 37 processed (Perplexity: 10.22).\n",
      "Document 63 Step 37 processed (Perplexity: 11.95).\n",
      "Document 77 Step 37 processed (Perplexity: 4.60).\n",
      "Document 56 Step 37 processed (Perplexity: 8.30).\n",
      "Document 37 Step 37 processed (Perplexity: 13.49).\n",
      "Document 34 Step 37 processed (Perplexity: 12.33).\n",
      "Document 66 Step 37 processed (Perplexity: 14.36).\n",
      "Document 25 Step 37 processed (Perplexity: 9.35).\n",
      "Document 94 Step 37 processed (Perplexity: 14.52).\n",
      "Document 98 Step 37 processed (Perplexity: 12.29).\n",
      "\n",
      "\n",
      "Document 27 Step 38 processed (Perplexity: 13.53).\n",
      "Document 4 Step 38 processed (Perplexity: 16.88).\n",
      "Document 23 Step 38 processed (Perplexity: 8.09).\n",
      "Document 2 Step 38 processed (Perplexity: 20.90).\n",
      "Document 99 Step 38 processed (Perplexity: 9.79).\n",
      "Document 88 Step 38 processed (Perplexity: 15.05).\n",
      "Document 15 Step 38 processed (Perplexity: 14.28).\n",
      "Document 61 Step 38 processed (Perplexity: 16.88).\n",
      "Document 97 Step 38 processed (Perplexity: 13.31).\n",
      "Document 40 Step 38 processed (Perplexity: 14.26).\n",
      "\n",
      "\n",
      "Document 42 Step 39 processed (Perplexity: 14.93).\n",
      "Document 78 Step 39 processed (Perplexity: 12.52).\n",
      "Document 52 Step 39 processed (Perplexity: 8.09).\n",
      "Document 93 Step 39 processed (Perplexity: 8.47).\n",
      "Document 71 Step 39 processed (Perplexity: 8.85).\n",
      "Document 84 Step 39 processed (Perplexity: 12.44).\n",
      "Document 36 Step 39 processed (Perplexity: 9.45).\n",
      "Document 22 Step 39 processed (Perplexity: 13.38).\n",
      "Document 83 Step 39 processed (Perplexity: 4.58).\n",
      "Document 90 Step 39 processed (Perplexity: 12.67).\n",
      "\n",
      "\n",
      "Document 73 Step 40 processed (Perplexity: 14.07).\n",
      "Document 16 Step 40 processed (Perplexity: 10.45).\n",
      "Document 96 Step 40 processed (Perplexity: 11.16).\n",
      "Document 4 Step 40 processed (Perplexity: 14.59).\n",
      "Document 29 Step 40 processed (Perplexity: 10.34).\n",
      "Document 65 Step 40 processed (Perplexity: 12.58).\n",
      "Document 48 Step 40 processed (Perplexity: 9.08).\n",
      "Document 21 Step 40 processed (Perplexity: 13.78).\n",
      "Document 0 Step 40 processed (Perplexity: 11.12).\n",
      "Document 98 Step 40 processed (Perplexity: 12.66).\n",
      "\n",
      "\n",
      "Document 73 Step 41 processed (Perplexity: 11.91).\n",
      "Document 79 Step 41 processed (Perplexity: 7.89).\n",
      "Document 39 Step 41 processed (Perplexity: 8.56).\n",
      "Document 92 Step 41 processed (Perplexity: 8.95).\n",
      "Document 30 Step 41 processed (Perplexity: 7.52).\n",
      "Document 62 Step 41 processed (Perplexity: 9.48).\n",
      "Document 19 Step 41 processed (Perplexity: 10.70).\n",
      "Document 60 Step 41 processed (Perplexity: 20.74).\n",
      "Document 65 Step 41 processed (Perplexity: 11.75).\n",
      "Document 64 Step 41 processed (Perplexity: 14.01).\n",
      "Average loss at step 41: 2.427440\n",
      "\tPerplexity at step 41: 11.329837\n",
      "\n",
      "Valid Perplexity: 43.98\n",
      "\n",
      "Generated Text after epoch 40 ... \n",
      "======================== New text Segment ==========================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t d had that the sled in the child to the great the had the boel chise they they chang, and dride and that the sleven the great they seees, and there the bodded, and then were the body to you, said, now have the boad to me there, and the preat and will be the borm, and saw they was, and there the princed to heating, and drot the sled in the child, and there the boad to the great a shart to her, and the boad to the bored the chiled that the boad to the great a shart to sleep as the had the bout if the bodded, and the boy breast of the bride had the bout if they will the bound the chiled that the boad to the great a shart to sleep as the had the bout if the bodded, and the boy breast of the bride had the bout if they will the bound the chiled that the boad to the great a shart to sleep as the had the bout if the bodded, and the boy breast of the bride had the bout if they will the bound the chiled that the boad to the great a shart to sleep as the had the bout if the bodded, and the boy breast of the bride had the bout if they will the bound the chiled that the boad to the great a shart to sleep as the had the bout if the bodded, and the boy breast of the bride had the bout if they will the bound the chiled that the boad to the great a shart to sleep as the had the bout if the bodded, and the boy breast of the bride had the bout if they will the bound the chiled that the boad to the great a shart to sleep as the had the bout if the bodded, and the boy breast of the bride had the bout if they will the bound the chiled that the boad to the great a shart to sleep as the had the bout if the bodded, and the boy breast of the bride had the bout if they will the bound the chiled that the boad to the great a shart to sleep as the had the bout if the bodded, and the boy breast of the bride had the bout if they will the bound the chiled that the boad to the great a shart to sleep as the had the bout if the bodded, and the boy breast of the bride had the bout if they will the bound \n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 55 Step 42 processed (Perplexity: 12.00).\n",
      "Document 86 Step 42 processed (Perplexity: 10.24).\n",
      "Document 33 Step 42 processed (Perplexity: 13.85).\n",
      "Document 81 Step 42 processed (Perplexity: 19.34).\n",
      "Document 85 Step 42 processed (Perplexity: 12.19).\n",
      "Document 57 Step 42 processed (Perplexity: 11.79).\n",
      "Document 99 Step 42 processed (Perplexity: 7.86).\n",
      "Document 94 Step 42 processed (Perplexity: 12.88).\n",
      "Document 11 Step 42 processed (Perplexity: 16.67).\n",
      "Document 24 Step 42 processed (Perplexity: 10.12).\n",
      "\n",
      "\n",
      "Document 53 Step 43 processed (Perplexity: 8.29).\n",
      "Document 33 Step 43 processed (Perplexity: 11.79).\n",
      "Document 29 Step 43 processed (Perplexity: 10.05).\n",
      "Document 6 Step 43 processed (Perplexity: 13.51).\n",
      "Document 14 Step 43 processed (Perplexity: 16.66).\n",
      "Document 86 Step 43 processed (Perplexity: 8.68).\n",
      "Document 43 Step 43 processed (Perplexity: 10.36).\n",
      "Document 98 Step 43 processed (Perplexity: 12.81).\n",
      "Document 70 Step 43 processed (Perplexity: 10.07).\n",
      "Document 95 Step 43 processed (Perplexity: 10.33).\n",
      "\n",
      "\n",
      "Document 36 Step 44 processed (Perplexity: 8.90).\n",
      "Document 49 Step 44 processed (Perplexity: 11.67).\n",
      "Document 86 Step 44 processed (Perplexity: 7.27).\n",
      "Document 3 Step 44 processed (Perplexity: 11.07).\n",
      "Document 95 Step 44 processed (Perplexity: 8.27).\n",
      "Document 51 Step 44 processed (Perplexity: 6.16).\n",
      "Document 90 Step 44 processed (Perplexity: 12.80).\n",
      "Document 32 Step 44 processed (Perplexity: 12.48).\n",
      "Document 35 Step 44 processed (Perplexity: 15.95).\n",
      "Document 76 Step 44 processed (Perplexity: 11.79).\n",
      "\n",
      "\n",
      "Document 56 Step 45 processed (Perplexity: 7.57).\n",
      "Document 93 Step 45 processed (Perplexity: 9.43).\n",
      "Document 52 Step 45 processed (Perplexity: 8.35).\n",
      "Document 60 Step 45 processed (Perplexity: 20.11).\n",
      "Document 16 Step 45 processed (Perplexity: 10.29).\n",
      "Document 3 Step 45 processed (Perplexity: 9.35).\n",
      "Document 32 Step 45 processed (Perplexity: 9.82).\n",
      "Document 73 Step 45 processed (Perplexity: 13.72).\n",
      "Document 62 Step 45 processed (Perplexity: 8.26).\n",
      "Document 15 Step 45 processed (Perplexity: 14.99).\n",
      "\n",
      "\n",
      "Document 44 Step 46 processed (Perplexity: 11.39).\n",
      "Document 74 Step 46 processed (Perplexity: 11.14).\n",
      "Document 57 Step 46 processed (Perplexity: 11.47).\n",
      "Document 76 Step 46 processed (Perplexity: 9.86).\n",
      "Document 84 Step 46 processed (Perplexity: 12.18).\n",
      "Document 95 Step 46 processed (Perplexity: 9.27).\n",
      "Document 63 Step 46 processed (Perplexity: 12.86).\n",
      "Document 14 Step 46 processed (Perplexity: 15.63).\n",
      "Document 49 Step 46 processed (Perplexity: 9.98).\n",
      "Document 4 Step 46 processed (Perplexity: 15.15).\n",
      "Average loss at step 46: 2.413755\n",
      "\tPerplexity at step 46: 11.175849\n",
      "\n",
      "Valid Perplexity: 38.73\n",
      "\n",
      "Generated Text after epoch 45 ... \n",
      "======================== New text Segment ==========================\n",
      "\t ce said the king main father, and the king's daughter\n",
      "to the should the kingelled it the king he had go that, and he was the tilt his that that the find said me, they said the kingeld the find jumped out of the hords saw that, and the king's son he had done to the king the called the king's soll in the king and began them the king said agains, and he was the hund me, and then the king he had been the king, and he was the heard the king's soll tailon and said to the fathing the king, and he was the hearten had said that the hundrl tove to the king and be the king, and he was he said, when the king he had go that, and he was the tilt his that that the find said me, they said the kingeld the find jumped out of the hords saw that, and the king's son he had done to the king the called the king's soll in the king and began them the king said agains, and he was the hund me, and then the king he had been the king, and he was the heard the king's soll tailon and said to the fathing the king, and he was the hearten had said that the hundrl tove to the king and be the king, and he was he said, when the king he had go that, and he was the tilt his that that the find said me, they said the kingeld the find jumped out of the hords saw that, and the king's son he had done to the king the called the king's soll in the king and began them the king said agains, and he was the hund me, and then the king he had been the king, and he was the heard the king's soll tailon and said to the fathing the king, and he was the hearten had said that the hundrl tove to the king and be the king, and he was he said, when the king he had go that, and he was the tilt his that that the find said me, they said the kingeld the find jumped out of the hords saw that, and the king's son he had done to the king the called the king's soll in the king and began them the king said agains, and he was the hund me, and then the king he had been the king, and he was the heard the king's soll tailon and said to the fa\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 39 Step 47 processed (Perplexity: 7.98).\n",
      "Document 52 Step 47 processed (Perplexity: 7.37).\n",
      "Document 34 Step 47 processed (Perplexity: 12.30).\n",
      "Document 48 Step 47 processed (Perplexity: 9.78).\n",
      "Document 40 Step 47 processed (Perplexity: 13.63).\n",
      "Document 25 Step 47 processed (Perplexity: 9.54).\n",
      "Document 0 Step 47 processed (Perplexity: 11.21).\n",
      "Document 9 Step 47 processed (Perplexity: 13.17).\n",
      "Document 12 Step 47 processed (Perplexity: 11.24).\n",
      "Document 90 Step 47 processed (Perplexity: 11.49).\n",
      "\n",
      "\n",
      "Document 26 Step 48 processed (Perplexity: 18.78).\n",
      "Document 92 Step 48 processed (Perplexity: 8.48).\n",
      "Document 17 Step 48 processed (Perplexity: 11.38).\n",
      "Document 74 Step 48 processed (Perplexity: 10.13).\n",
      "Document 88 Step 48 processed (Perplexity: 12.99).\n",
      "Document 13 Step 48 processed (Perplexity: 12.16).\n",
      "Document 94 Step 48 processed (Perplexity: 11.10).\n",
      "Document 35 Step 48 processed (Perplexity: 14.23).\n",
      "Document 53 Step 48 processed (Perplexity: 7.69).\n",
      "Document 6 Step 48 processed (Perplexity: 13.24).\n",
      "\n",
      "\n",
      "Document 15 Step 49 processed (Perplexity: 12.69).\n",
      "Document 40 Step 49 processed (Perplexity: 12.12).\n",
      "Document 84 Step 49 processed (Perplexity: 11.36).\n",
      "Document 24 Step 49 processed (Perplexity: 10.13).\n",
      "Document 97 Step 49 processed (Perplexity: 12.89).\n",
      "Document 82 Step 49 processed (Perplexity: 12.36).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Document 86 Step 49 processed (Perplexity: 8.99).\n",
      "Document 95 Step 49 processed (Perplexity: 9.49).\n",
      "Document 42 Step 49 processed (Perplexity: 15.10).\n",
      "Document 85 Step 49 processed (Perplexity: 11.44).\n",
      "\n",
      "\n",
      "Document 85 Step 50 processed (Perplexity: 6.96).\n",
      "Document 58 Step 50 processed (Perplexity: 4.98).\n",
      "Document 70 Step 50 processed (Perplexity: 10.71).\n",
      "Document 41 Step 50 processed (Perplexity: 15.18).\n",
      "Document 82 Step 50 processed (Perplexity: 10.05).\n",
      "Document 98 Step 50 processed (Perplexity: 13.02).\n",
      "Document 39 Step 50 processed (Perplexity: 7.26).\n",
      "Document 33 Step 50 processed (Perplexity: 13.18).\n",
      "Document 44 Step 50 processed (Perplexity: 11.03).\n",
      "Document 71 Step 50 processed (Perplexity: 8.39).\n",
      "\n",
      "\n",
      "Document 62 Step 51 processed (Perplexity: 8.12).\n",
      "Document 33 Step 51 processed (Perplexity: 9.92).\n",
      "Document 92 Step 51 processed (Perplexity: 7.98).\n",
      "Document 71 Step 51 processed (Perplexity: 6.14).\n",
      "Document 67 Step 51 processed (Perplexity: 10.14).\n",
      "Document 23 Step 51 processed (Perplexity: 8.37).\n",
      "Document 74 Step 51 processed (Perplexity: 10.23).\n",
      "Document 89 Step 51 processed (Perplexity: 3.83).\n",
      "Document 70 Step 51 processed (Perplexity: 10.06).\n",
      "Document 60 Step 51 processed (Perplexity: 19.46).\n",
      "Average loss at step 51: 2.337437\n",
      "\tPerplexity at step 51: 10.354668\n",
      "\n",
      "Valid Perplexity: 36.62\n",
      "\n",
      "Generated Text after epoch 50 ... \n",
      "======================== New text Segment ==========================\n",
      "\t tlied sat ofm with me the bride it, and and they had.  then the rought take her had the inught took the caster.\n",
      "\n",
      "then he looked to to the brit, then he had the bridegy the knapped out of the over was there and with, and said, and her bring, but hown had ware, and was you wate deaing, and told with to the eald by the bride it was oper no hadren was the chand, and some had to not a knover walled he said, and was the brother, i with the bridtly to the seet not was brother, and to water and the that, and her with the old king said, and said, and was beside him, you the was the with to the death the bride he was not, and was there, and in the brit, and stone was brother heress, the dead, and was brother, and thind the king's daughter, and was there, and in the knapsack the pried, and the to her, you had was brother, it a know he and the bridegrobler they was the innds king musicm.  dom was nother the every letter to take the with, and said, and was beside him, you the was the with to the death the bride he was not, and was there, and in the brit, and stone was brother heress, the dead, and was brother, and thind the king's daughter, and was there, and in the knapsack the pried, and the to her, you had was brother, it a know he and the bridegrobler they was the innds king musicm.  dom was nother the every letter to take the with, and said, and was beside him, you the was the with to the death the bride he was not, and was there, and in the brit, and stone was brother heress, the dead, and was brother, and thind the king's daughter, and was there, and in the knapsack the pried, and the to her, you had was brother, it a know he and the bridegrobler they was the innds king musicm.  dom was nother the every letter to take the with, and said, and was beside him, you the was the with to the death the bride he was not, and was there, and in the brit, and stone was brother heress, the dead, and was brother, and thind the king's daughter, and was there, and in the knapsack the pried\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 41 Step 52 processed (Perplexity: 13.65).\n",
      "Document 62 Step 52 processed (Perplexity: 6.26).\n",
      "Document 91 Step 52 processed (Perplexity: 17.28).\n",
      "Document 56 Step 52 processed (Perplexity: 7.47).\n",
      "Document 34 Step 52 processed (Perplexity: 12.34).\n",
      "Document 82 Step 52 processed (Perplexity: 11.04).\n",
      "Document 58 Step 52 processed (Perplexity: 4.56).\n",
      "Document 14 Step 52 processed (Perplexity: 18.08).\n",
      "Document 72 Step 52 processed (Perplexity: 12.92).\n",
      "Document 35 Step 52 processed (Perplexity: 14.68).\n",
      "\n",
      "\n",
      "Document 98 Step 53 processed (Perplexity: 12.33).\n",
      "Document 48 Step 53 processed (Perplexity: 9.73).\n",
      "Document 43 Step 53 processed (Perplexity: 11.23).\n",
      "Document 72 Step 53 processed (Perplexity: 10.72).\n",
      "Document 84 Step 53 processed (Perplexity: 11.13).\n",
      "Document 6 Step 53 processed (Perplexity: 12.13).\n",
      "Document 53 Step 53 processed (Perplexity: 8.07).\n",
      "Document 13 Step 53 processed (Perplexity: 12.14).\n",
      "Document 55 Step 53 processed (Perplexity: 12.70).\n",
      "Document 91 Step 53 processed (Perplexity: 14.52).\n",
      "\n",
      "\n",
      "Document 49 Step 54 processed (Perplexity: 10.90).\n",
      "Document 14 Step 54 processed (Perplexity: 14.84).\n",
      "Document 93 Step 54 processed (Perplexity: 8.84).\n",
      "Document 58 Step 54 processed (Perplexity: 4.13).\n",
      "Document 99 Step 54 processed (Perplexity: 10.16).\n",
      "Document 72 Step 54 processed (Perplexity: 12.39).\n",
      "Document 0 Step 54 processed (Perplexity: 11.45).\n",
      "Document 29 Step 54 processed (Perplexity: 11.52).\n",
      "Document 66 Step 54 processed (Perplexity: 15.57).\n",
      "Document 97 Step 54 processed (Perplexity: 12.47).\n",
      "\n",
      "\n",
      "Document 12 Step 55 processed (Perplexity: 11.02).\n",
      "Document 32 Step 55 processed (Perplexity: 11.77).\n",
      "Document 90 Step 55 processed (Perplexity: 12.14).\n",
      "Document 65 Step 55 processed (Perplexity: 13.51).\n",
      "Document 63 Step 55 processed (Perplexity: 12.08).\n",
      "Document 91 Step 55 processed (Perplexity: 13.65).\n",
      "Document 16 Step 55 processed (Perplexity: 10.26).\n",
      "Document 97 Step 55 processed (Perplexity: 10.38).\n",
      "Document 48 Step 55 processed (Perplexity: 8.49).\n",
      "Document 78 Step 55 processed (Perplexity: 12.78).\n",
      "\n",
      "\n",
      "Document 81 Step 56 processed (Perplexity: 19.42).\n",
      "Document 0 Step 56 processed (Perplexity: 9.65).\n",
      "Document 8 Step 56 processed (Perplexity: 13.07).\n",
      "Document 73 Step 56 processed (Perplexity: 13.44).\n",
      "Document 42 Step 56 processed (Perplexity: 14.58).\n",
      "Document 32 Step 56 processed (Perplexity: 9.81).\n",
      "Document 58 Step 56 processed (Perplexity: 4.02).\n",
      "Document 3 Step 56 processed (Perplexity: 12.23).\n",
      "Document 43 Step 56 processed (Perplexity: 10.56).\n",
      "Document 76 Step 56 processed (Perplexity: 11.45).\n",
      "Average loss at step 56: 2.406352\n",
      "\tPerplexity at step 56: 11.093421\n",
      "\n",
      "Valid Perplexity: 47.20\n",
      "\n",
      "Generated Text after epoch 55 ... \n",
      "======================== New text Segment ==========================\n",
      "\t d had have one to the king andoos bele the bounty the could led that the fox, and the children, and the could not and the bearl the that the fox, and the king and begand to the their in, and began that he was when the king's sone\n",
      "not the king's daughter the children, and said, the children, and the child the mill, and the childroging to the was to the king's came to the was the beautife that the fox, and the king and begand to the their in, and began that he was when the king's sone\n",
      "not the king's daughter the children, and said, the children, and the child the mill, and the childroging to the was to the king's came to the was the beautife that the fox, and the king and begand to the their in, and began that he was when the king's sone\n",
      "not the king's daughter the children, and said, the children, and the child the mill, and the childroging to the was to the king's came to the was the beautife that the fox, and the king and begand to the their in, and began that he was when the king's sone\n",
      "not the king's daughter the children, and said, the children, and the child the mill, and the childroging to the was to the king's came to the was the beautife that the fox, and the king and begand to the their in, and began that he was when the king's sone\n",
      "not the king's daughter the children, and said, the children, and the child the mill, and the childroging to the was to the king's came to the was the beautife that the fox, and the king and begand to the their in, and began that he was when the king's sone\n",
      "not the king's daughter the children, and said, the children, and the child the mill, and the childroging to the was to the king's came to the was the beautife that the fox, and the king and begand to the their in, and began that he was when the king's sone\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "not the king's daughter the children, and said, the children, and the child the mill, and the childroging to the was to the king's came to the was the beautife that the fox, and the king and begand to the their in, and began\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 14 Step 57 processed (Perplexity: 14.97).\n",
      "Document 96 Step 57 processed (Perplexity: 12.15).\n",
      "Document 33 Step 57 processed (Perplexity: 12.53).\n",
      "Document 22 Step 57 processed (Perplexity: 12.50).\n",
      "Document 65 Step 57 processed (Perplexity: 12.12).\n",
      "Document 58 Step 57 processed (Perplexity: 3.49).\n",
      "Document 97 Step 57 processed (Perplexity: 11.51).\n",
      "Document 49 Step 57 processed (Perplexity: 10.14).\n",
      "Document 48 Step 57 processed (Perplexity: 8.11).\n",
      "Document 11 Step 57 processed (Perplexity: 16.16).\n",
      "\n",
      "\n",
      "Document 87 Step 58 processed (Perplexity: 7.76).\n",
      "Document 82 Step 58 processed (Perplexity: 11.98).\n",
      "Document 47 Step 58 processed (Perplexity: 8.86).\n",
      "Document 69 Step 58 processed (Perplexity: 12.23).\n",
      "Document 48 Step 58 processed (Perplexity: 6.03).\n",
      "Document 44 Step 58 processed (Perplexity: 11.14).\n",
      "Document 58 Step 58 processed (Perplexity: 3.26).\n",
      "Document 75 Step 58 processed (Perplexity: 14.44).\n",
      "Document 49 Step 58 processed (Perplexity: 9.01).\n",
      "Document 91 Step 58 processed (Perplexity: 14.74).\n",
      "\n",
      "\n",
      "Document 8 Step 59 processed (Perplexity: 11.21).\n",
      "Document 37 Step 59 processed (Perplexity: 12.26).\n",
      "Document 39 Step 59 processed (Perplexity: 7.95).\n",
      "Document 71 Step 59 processed (Perplexity: 8.53).\n",
      "Document 41 Step 59 processed (Perplexity: 14.45).\n",
      "Document 52 Step 59 processed (Perplexity: 7.91).\n",
      "Document 57 Step 59 processed (Perplexity: 11.55).\n",
      "Document 84 Step 59 processed (Perplexity: 10.85).\n",
      "Document 44 Step 59 processed (Perplexity: 9.36).\n",
      "Document 92 Step 59 processed (Perplexity: 8.27).\n",
      "\n",
      "\n",
      "Document 38 Step 60 processed (Perplexity: 11.09).\n",
      "Document 70 Step 60 processed (Perplexity: 10.13).\n",
      "Document 26 Step 60 processed (Perplexity: 19.29).\n",
      "Document 83 Step 60 processed (Perplexity: 5.28).\n",
      "Document 81 Step 60 processed (Perplexity: 20.70).\n",
      "Document 11 Step 60 processed (Perplexity: 15.05).\n",
      "Document 96 Step 60 processed (Perplexity: 10.73).\n",
      "Document 84 Step 60 processed (Perplexity: 9.47).\n",
      "Document 66 Step 60 processed (Perplexity: 14.38).\n",
      "Document 40 Step 60 processed (Perplexity: 12.77).\n",
      "\n",
      "\n",
      "Document 79 Step 61 processed (Perplexity: 8.87).\n",
      "Document 19 Step 61 processed (Perplexity: 11.28).\n",
      "Document 53 Step 61 processed (Perplexity: 7.99).\n",
      "Document 47 Step 61 processed (Perplexity: 8.72).\n",
      "Document 16 Step 61 processed (Perplexity: 9.90).\n",
      "Document 96 Step 61 processed (Perplexity: 9.90).\n",
      "Document 41 Step 61 processed (Perplexity: 12.84).\n",
      "Document 83 Step 61 processed (Perplexity: 4.03).\n",
      "Document 7 Step 61 processed (Perplexity: 13.04).\n",
      "Document 10 Step 61 processed (Perplexity: 9.49).\n",
      "Average loss at step 61: 2.319665\n",
      "\tPerplexity at step 61: 10.172268\n",
      "\n",
      "Valid Perplexity: 50.71\n",
      "\n",
      "Generated Text after epoch 60 ... \n",
      "======================== New text Segment ==========================\n",
      "\t  and said, but she begbl the girlow you that to the king and when\n",
      "the little by the your thread, she thread returned, and the you have by could not the pime to the king and said the you come, and the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the the bridegroom the third and the beautiful brold night and the three soon the king's sone\n",
      "the \n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 51 Step 62 processed (Perplexity: 6.67).\n",
      "Document 41 Step 62 processed (Perplexity: 11.77).\n",
      "Document 46 Step 62 processed (Perplexity: 19.99).\n",
      "Document 80 Step 62 processed (Perplexity: 10.31).\n",
      "Document 81 Step 62 processed (Perplexity: 18.05).\n",
      "Document 20 Step 62 processed (Perplexity: 9.10).\n",
      "Document 40 Step 62 processed (Perplexity: 12.15).\n",
      "Document 93 Step 62 processed (Perplexity: 8.57).\n",
      "Document 26 Step 62 processed (Perplexity: 18.67).\n",
      "Document 38 Step 62 processed (Perplexity: 10.01).\n",
      "\n",
      "\n",
      "Document 27 Step 63 processed (Perplexity: 14.10).\n",
      "Document 92 Step 63 processed (Perplexity: 8.12).\n",
      "Document 72 Step 63 processed (Perplexity: 12.38).\n",
      "Document 33 Step 63 processed (Perplexity: 11.77).\n",
      "Document 56 Step 63 processed (Perplexity: 7.57).\n",
      "Document 26 Step 63 processed (Perplexity: 16.49).\n",
      "Document 58 Step 63 processed (Perplexity: 4.09).\n",
      "Document 93 Step 63 processed (Perplexity: 7.26).\n",
      "Document 83 Step 63 processed (Perplexity: 4.17).\n",
      "Document 41 Step 63 processed (Perplexity: 13.49).\n",
      "\n",
      "\n",
      "Document 31 Step 64 processed (Perplexity: 7.12).\n",
      "Document 9 Step 64 processed (Perplexity: 14.19).\n",
      "Document 77 Step 64 processed (Perplexity: 4.89).\n",
      "Document 55 Step 64 processed (Perplexity: 14.43).\n",
      "Document 0 Step 64 processed (Perplexity: 11.16).\n",
      "Document 69 Step 64 processed (Perplexity: 11.41).\n",
      "Document 95 Step 64 processed (Perplexity: 10.85).\n",
      "Document 94 Step 64 processed (Perplexity: 12.03).\n",
      "Document 64 Step 64 processed (Perplexity: 13.75).\n",
      "Document 40 Step 64 processed (Perplexity: 12.15).\n",
      "\n",
      "\n",
      "Document 59 Step 65 processed (Perplexity: 6.74).\n",
      "Document 92 Step 65 processed (Perplexity: 8.09).\n",
      "Document 42 Step 65 processed (Perplexity: 16.67).\n",
      "Document 86 Step 65 processed (Perplexity: 10.15).\n",
      "Document 56 Step 65 processed (Perplexity: 6.49).\n",
      "Document 4 Step 65 processed (Perplexity: 17.56).\n",
      "Document 28 Step 65 processed (Perplexity: 8.17).\n",
      "Document 52 Step 65 processed (Perplexity: 8.38).\n",
      "Document 58 Step 65 processed (Perplexity: 3.92).\n",
      "Document 61 Step 65 processed (Perplexity: 18.58).\n",
      "\n",
      "\n",
      "Document 77 Step 66 processed (Perplexity: 4.51).\n",
      "Document 50 Step 66 processed (Perplexity: 14.32).\n",
      "Document 55 Step 66 processed (Perplexity: 12.91).\n",
      "Document 48 Step 66 processed (Perplexity: 9.14).\n",
      "Document 65 Step 66 processed (Perplexity: 13.32).\n",
      "Document 27 Step 66 processed (Perplexity: 13.77).\n",
      "Document 87 Step 66 processed (Perplexity: 7.79).\n",
      "Document 73 Step 66 processed (Perplexity: 15.01).\n",
      "Document 62 Step 66 processed (Perplexity: 9.19).\n",
      "Document 18 Step 66 processed (Perplexity: 10.12).\n",
      "Average loss at step 66: 2.323407\n",
      "\tPerplexity at step 66: 10.210403\n",
      "\n",
      "Valid Perplexity: 54.55\n",
      "\n",
      "Generated Text after epoch 65 ... \n",
      "======================== New text Segment ==========================\n",
      "\t then said, beh, as were to he had the hoy themserved the sun't look into the whole said then they wanting then the sun son sit the whose was should not are and the bring are of the sund one was the mofers, and wadded and to the glass, and with the glass, and they went he can the glasless, and was inwn the rrosas of the wost was were to the cour, then they came he had to stop, the reach, not was at tomen a little mother, and dring in the sune\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "went, and was the glas and they went his good, and a little have the royal splisent into the whothis she with the glasters, and dring in the sune\n",
      "went, and was the glas and they went his good, and a little have the royal splisent into the whothis she with the glasters, and dring in the sune\n",
      "went, and was the glas and they went his good, and a little have the royal splisent into the whothis she with the glasters, and dring in the sune\n",
      "went, and was the glas and they went his good, and a little have the royal splisent into the whothis she with the glasters, and dring in the sune\n",
      "went, and was the glas and they went his good, and a little have the royal splisent into the whothis she with the glasters, and dring in the sune\n",
      "went, and was the glas and they went his good, and a little have the royal splisent into the whothis she with the glasters, and dring in the sune\n",
      "went, and was the glas and they went his good, and a little have the royal splisent into the whothis she with the glasters, and dring in the sune\n",
      "went, and was the glas and they went his good, and a little have the royal splisent into the whothis she with the glasters, and dring in the sune\n",
      "went, and was the glas and they went his good, and a little have the royal splisent into the whothis she with the glasters, and dring in the sune\n",
      "went, and was the glas and they went his good, and a little have the royal splisent into the whothis she with the glasters, and dring in the sune\n",
      "went, and was the glas and they went his good, and a little have the royal splisent into the who\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 62 Step 67 processed (Perplexity: 4.51).\n",
      "Document 14 Step 67 processed (Perplexity: 16.93).\n",
      "Document 79 Step 67 processed (Perplexity: 8.21).\n",
      "Document 96 Step 67 processed (Perplexity: 12.02).\n",
      "Document 63 Step 67 processed (Perplexity: 12.77).\n",
      "Document 71 Step 67 processed (Perplexity: 8.70).\n",
      "Document 54 Step 67 processed (Perplexity: 10.97).\n",
      "Document 52 Step 67 processed (Perplexity: 7.52).\n",
      "Document 75 Step 67 processed (Perplexity: 14.12).\n",
      "Document 93 Step 67 processed (Perplexity: 7.95).\n",
      "\n",
      "\n",
      "Document 47 Step 68 processed (Perplexity: 8.64).\n",
      "Document 80 Step 68 processed (Perplexity: 9.85).\n",
      "Document 84 Step 68 processed (Perplexity: 10.98).\n",
      "Document 88 Step 68 processed (Perplexity: 13.21).\n",
      "Document 52 Step 68 processed (Perplexity: 6.24).\n",
      "Document 1 Step 68 processed (Perplexity: 14.72).\n",
      "Document 11 Step 68 processed (Perplexity: 16.15).\n",
      "Document 32 Step 68 processed (Perplexity: 12.19).\n",
      "Document 15 Step 68 processed (Perplexity: 13.96).\n",
      "Document 77 Step 68 processed (Perplexity: 4.16).\n",
      "\n",
      "\n",
      "Document 60 Step 69 processed (Perplexity: 21.65).\n",
      "Document 44 Step 69 processed (Perplexity: 10.70).\n",
      "Document 1 Step 69 processed (Perplexity: 11.46).\n",
      "Document 87 Step 69 processed (Perplexity: 7.21).\n",
      "Document 29 Step 69 processed (Perplexity: 12.19).\n",
      "Document 2 Step 69 processed (Perplexity: 21.05).\n",
      "Document 32 Step 69 processed (Perplexity: 9.59).\n",
      "Document 3 Step 69 processed (Perplexity: 11.61).\n",
      "Document 20 Step 69 processed (Perplexity: 9.21).\n",
      "Document 94 Step 69 processed (Perplexity: 13.14).\n",
      "\n",
      "\n",
      "Document 54 Step 70 processed (Perplexity: 10.38).\n",
      "Document 42 Step 70 processed (Perplexity: 15.78).\n",
      "Document 36 Step 70 processed (Perplexity: 10.75).\n",
      "Document 65 Step 70 processed (Perplexity: 13.53).\n",
      "Document 75 Step 70 processed (Perplexity: 12.72).\n",
      "Document 58 Step 70 processed (Perplexity: 4.33).\n",
      "Document 34 Step 70 processed (Perplexity: 13.15).\n",
      "Document 82 Step 70 processed (Perplexity: 12.60).\n",
      "Document 33 Step 70 processed (Perplexity: 12.53).\n",
      "Document 37 Step 70 processed (Perplexity: 12.83).\n",
      "\n",
      "\n",
      "Document 50 Step 71 processed (Perplexity: 12.33).\n",
      "Document 51 Step 71 processed (Perplexity: 6.83).\n",
      "Document 73 Step 71 processed (Perplexity: 14.91).\n",
      "Document 37 Step 71 processed (Perplexity: 9.66).\n",
      "Document 99 Step 71 processed (Perplexity: 11.11).\n",
      "Document 64 Step 71 processed (Perplexity: 14.96).\n",
      "Document 68 Step 71 processed (Perplexity: 14.12).\n",
      "Document 18 Step 71 processed (Perplexity: 8.91).\n",
      "Document 97 Step 71 processed (Perplexity: 12.75).\n",
      "Document 87 Step 71 processed (Perplexity: 6.88).\n",
      "Average loss at step 71: 2.381555\n",
      "\tPerplexity at step 71: 10.821713\n",
      "\n",
      "Valid Perplexity: 48.97\n",
      "\n",
      "Generated Text after epoch 70 ... \n",
      "======================== New text Segment ==========================\n",
      "\t  but he had brought to that that he had to see her brin was to he was to he went, and the bride to had nothing, and the little hut the went to the bright sun was to little little must was brought to thinks, is he whole her and it to little light to lost the little dread are the brike of the little house, and was to beautifuld to his no one to her, and was to he was to said the king of them, and was to hounds was to beave after the bright to his little little much the little houth took the tooking to he was was the bride to her not to to the taid to the wed the humve home was to beave his wank and said, and thenk to the this such as the little much had to had not the bring the griven told her, and was to dressed to the little mounto her, and said, and when he world this, the bright breed to her, and was to sought the bride wall, and that in the little much her, and he had brand was them, and was to taid to his no\n",
      "one and was to dead to hom the whole this browd to her, and was to he was to said the king of them, and was to hounds was to beave after the bright to his little little much the little houth took the tooking to he was was the bride to her not to to the taid to the wed the humve home was to beave his wank and said, and thenk to the this such as the little much had to had not the bring the griven told her, and was to dressed to the little mounto her, and said, and when he world this, the bright breed to her, and was to sought the bride wall, and that in the little much her, and he had brand was them, and was to taid to his no\n",
      "one and was to dead to hom the whole this browd to her, and was to he was to said the king of them, and was to hounds was to beave after the bright to his little little much the little houth took the tooking to he was was the bride to her not to to the taid to the wed the humve home was to beave his wank and said, and thenk to the this such as the little much had to had not the bring the griven told her, and was to dressed to the little mou\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 12 Step 72 processed (Perplexity: 12.05).\n",
      "Document 78 Step 72 processed (Perplexity: 13.71).\n",
      "Document 95 Step 72 processed (Perplexity: 10.98).\n",
      "Document 51 Step 72 processed (Perplexity: 5.11).\n",
      "Document 93 Step 72 processed (Perplexity: 8.64).\n",
      "Document 18 Step 72 processed (Perplexity: 7.63).\n",
      "Document 27 Step 72 processed (Perplexity: 14.52).\n",
      "Document 2 Step 72 processed (Perplexity: 19.29).\n",
      "Document 16 Step 72 processed (Perplexity: 10.23).\n",
      "Document 33 Step 72 processed (Perplexity: 11.71).\n",
      "\n",
      "\n",
      "Document 92 Step 73 processed (Perplexity: 8.54).\n",
      "Document 59 Step 73 processed (Perplexity: 6.92).\n",
      "Document 84 Step 73 processed (Perplexity: 12.08).\n",
      "Document 94 Step 73 processed (Perplexity: 10.84).\n",
      "Document 74 Step 73 processed (Perplexity: 11.20).\n",
      "Document 75 Step 73 processed (Perplexity: 12.67).\n",
      "Document 72 Step 73 processed (Perplexity: 12.11).\n",
      "Document 55 Step 73 processed (Perplexity: 12.97).\n",
      "Document 93 Step 73 processed (Perplexity: 6.99).\n",
      "Document 68 Step 73 processed (Perplexity: 12.58).\n",
      "\n",
      "\n",
      "Document 52 Step 74 processed (Perplexity: 7.34).\n",
      "Document 8 Step 74 processed (Perplexity: 12.23).\n",
      "Document 57 Step 74 processed (Perplexity: 11.81).\n",
      "Document 88 Step 74 processed (Perplexity: 13.30).\n",
      "Document 39 Step 74 processed (Perplexity: 8.66).\n",
      "Document 69 Step 74 processed (Perplexity: 11.63).\n",
      "Document 71 Step 74 processed (Perplexity: 8.58).\n",
      "Document 9 Step 74 processed (Perplexity: 13.42).\n",
      "Document 90 Step 74 processed (Perplexity: 12.72).\n",
      "Document 77 Step 74 processed (Perplexity: 4.71).\n",
      "\n",
      "\n",
      "Document 15 Step 75 processed (Perplexity: 13.92).\n",
      "Document 2 Step 75 processed (Perplexity: 18.27).\n",
      "Document 78 Step 75 processed (Perplexity: 11.85).\n",
      "Document 99 Step 75 processed (Perplexity: 8.81).\n",
      "Document 44 Step 75 processed (Perplexity: 12.74).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Document 8 Step 75 processed (Perplexity: 11.29).\n",
      "Document 80 Step 75 processed (Perplexity: 10.26).\n",
      "Document 31 Step 75 processed (Perplexity: 6.80).\n",
      "Document 45 Step 75 processed (Perplexity: 14.58).\n",
      "Document 52 Step 75 processed (Perplexity: 6.94).\n",
      "\n",
      "\n",
      "Document 71 Step 76 processed (Perplexity: 8.11).\n",
      "Document 39 Step 76 processed (Perplexity: 7.27).\n",
      "Document 35 Step 76 processed (Perplexity: 16.17).\n",
      "Document 17 Step 76 processed (Perplexity: 11.46).\n",
      "Document 50 Step 76 processed (Perplexity: 12.46).\n",
      "Document 36 Step 76 processed (Perplexity: 10.04).\n",
      "Document 99 Step 76 processed (Perplexity: 5.60).\n",
      "Document 27 Step 76 processed (Perplexity: 15.45).\n",
      "Document 78 Step 76 processed (Perplexity: 11.63).\n",
      "Document 88 Step 76 processed (Perplexity: 12.21).\n",
      "Average loss at step 76: 2.355050\n",
      "\tPerplexity at step 76: 10.538660\n",
      "\n",
      "Valid Perplexity: 57.05\n",
      "\n",
      "Generated Text after epoch 75 ... \n",
      "======================== New text Segment ==========================\n",
      "\t m and was so made the king's daughter was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the king, and what as she was was wanted to the something the king, and what it was that the peasant-might, and what as he, and was donnikowe the ki\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 96 Step 77 processed (Perplexity: 11.46).\n",
      "Document 57 Step 77 processed (Perplexity: 11.24).\n",
      "Document 72 Step 77 processed (Perplexity: 11.59).\n",
      "Document 97 Step 77 processed (Perplexity: 12.23).\n",
      "Document 47 Step 77 processed (Perplexity: 9.00).\n",
      "Document 37 Step 77 processed (Perplexity: 12.16).\n",
      "Document 76 Step 77 processed (Perplexity: 11.46).\n",
      "Document 17 Step 77 processed (Perplexity: 10.10).\n",
      "Document 83 Step 77 processed (Perplexity: 5.00).\n",
      "Document 88 Step 77 processed (Perplexity: 11.41).\n",
      "\n",
      "\n",
      "Document 22 Step 78 processed (Perplexity: 12.73).\n",
      "Document 25 Step 78 processed (Perplexity: 11.94).\n",
      "Document 0 Step 78 processed (Perplexity: 11.15).\n",
      "Document 62 Step 78 processed (Perplexity: 9.05).\n",
      "Document 6 Step 78 processed (Perplexity: 13.90).\n",
      "Document 57 Step 78 processed (Perplexity: 10.35).\n",
      "Document 61 Step 78 processed (Perplexity: 17.19).\n",
      "Document 20 Step 78 processed (Perplexity: 9.25).\n",
      "Document 27 Step 78 processed (Perplexity: 12.82).\n",
      "Document 3 Step 78 processed (Perplexity: 10.53).\n",
      "\n",
      "\n",
      "Document 35 Step 79 processed (Perplexity: 14.26).\n",
      "Document 23 Step 79 processed (Perplexity: 9.40).\n",
      "Document 66 Step 79 processed (Perplexity: 16.21).\n",
      "Document 9 Step 79 processed (Perplexity: 13.43).\n",
      "Document 13 Step 79 processed (Perplexity: 12.87).\n",
      "Document 37 Step 79 processed (Perplexity: 11.12).\n",
      "Document 36 Step 79 processed (Perplexity: 9.30).\n",
      "Document 15 Step 79 processed (Perplexity: 13.01).\n",
      "Document 22 Step 79 processed (Perplexity: 11.91).\n",
      "Document 85 Step 79 processed (Perplexity: 11.64).\n",
      "\n",
      "\n",
      "Document 58 Step 80 processed (Perplexity: 4.75).\n",
      "Document 49 Step 80 processed (Perplexity: 12.04).\n",
      "Document 11 Step 80 processed (Perplexity: 15.81).\n",
      "Document 62 Step 80 processed (Perplexity: 7.76).\n",
      "Document 75 Step 80 processed (Perplexity: 13.77).\n",
      "Document 72 Step 80 processed (Perplexity: 12.21).\n",
      "Document 39 Step 80 processed (Perplexity: 7.44).\n",
      "Document 74 Step 80 processed (Perplexity: 10.93).\n",
      "Document 98 Step 80 processed (Perplexity: 15.50).\n",
      "Document 66 Step 80 processed (Perplexity: 14.20).\n",
      "\n",
      "\n",
      "Document 65 Step 81 processed (Perplexity: 12.83).\n",
      "Document 92 Step 81 processed (Perplexity: 8.35).\n",
      "Document 51 Step 81 processed (Perplexity: 6.20).\n",
      "Document 86 Step 81 processed (Perplexity: 11.52).\n",
      "Document 44 Step 81 processed (Perplexity: 11.21).\n",
      "Document 25 Step 81 processed (Perplexity: 10.26).\n",
      "Document 20 Step 81 processed (Perplexity: 8.94).\n",
      "Document 64 Step 81 processed (Perplexity: 13.51).\n",
      "Document 33 Step 81 processed (Perplexity: 12.63).\n",
      "Document 73 Step 81 processed (Perplexity: 13.96).\n",
      "Average loss at step 81: 2.405723\n",
      "\tPerplexity at step 81: 11.086445\n",
      "\n",
      "Valid Perplexity: 47.03\n",
      "\n",
      "Generated Text after epoch 80 ... \n",
      "======================== New text Segment ==========================\n",
      "\t ut not, answered that the mother forest have to fall.\"  the mothed the forest, \"i will be and have be have have the mothed the forest, \"i have all with the most once had father have not and i will said, \"i have have fasted the boy, and the mother and said, \"you will site had have you was the father had the botted the father, he had told his might, i have have fasted to you will let the father, said the mother as the space, and there was notone of in the mond, and has happer.\"    \"good the morning that the beautiful morning said, \"i will be and had not had got the tastled the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, and have his mave the father, and the boy have so never the got went the fores was this father, an\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 90 Step 82 processed (Perplexity: 11.98).\n",
      "Document 1 Step 82 processed (Perplexity: 14.19).\n",
      "Document 64 Step 82 processed (Perplexity: 10.56).\n",
      "Document 17 Step 82 processed (Perplexity: 10.32).\n",
      "Document 86 Step 82 processed (Perplexity: 9.17).\n",
      "Document 31 Step 82 processed (Perplexity: 6.13).\n",
      "Document 19 Step 82 processed (Perplexity: 13.09).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Document 88 Step 82 processed (Perplexity: 12.21).\n",
      "Document 23 Step 82 processed (Perplexity: 8.42).\n",
      "Document 68 Step 82 processed (Perplexity: 14.99).\n",
      "\n",
      "\n",
      "Document 75 Step 83 processed (Perplexity: 12.72).\n",
      "Document 58 Step 83 processed (Perplexity: 4.42).\n",
      "Document 26 Step 83 processed (Perplexity: 20.57).\n",
      "Document 2 Step 83 processed (Perplexity: 19.17).\n",
      "Document 41 Step 83 processed (Perplexity: 14.29).\n",
      "Document 46 Step 83 processed (Perplexity: 19.16).\n",
      "Document 95 Step 83 processed (Perplexity: 10.51).\n",
      "Document 82 Step 83 processed (Perplexity: 11.24).\n",
      "Document 32 Step 83 processed (Perplexity: 12.48).\n",
      "Document 64 Step 83 processed (Perplexity: 11.54).\n",
      "\n",
      "\n",
      "Document 82 Step 84 processed (Perplexity: 7.99).\n",
      "Document 15 Step 84 processed (Perplexity: 12.59).\n",
      "Document 12 Step 84 processed (Perplexity: 10.97).\n",
      "Document 52 Step 84 processed (Perplexity: 7.48).\n",
      "Document 30 Step 84 processed (Perplexity: 8.68).\n",
      "Document 58 Step 84 processed (Perplexity: 4.37).\n",
      "Document 41 Step 84 processed (Perplexity: 13.68).\n",
      "Document 50 Step 84 processed (Perplexity: 12.25).\n",
      "Document 26 Step 84 processed (Perplexity: 17.75).\n",
      "Document 35 Step 84 processed (Perplexity: 13.81).\n",
      "\n",
      "\n",
      "Document 32 Step 85 processed (Perplexity: 9.88).\n",
      "Document 15 Step 85 processed (Perplexity: 10.99).\n",
      "Document 68 Step 85 processed (Perplexity: 12.58).\n",
      "Document 49 Step 85 processed (Perplexity: 11.04).\n",
      "Document 36 Step 85 processed (Perplexity: 9.15).\n",
      "Document 47 Step 85 processed (Perplexity: 8.92).\n",
      "Document 45 Step 85 processed (Perplexity: 13.69).\n",
      "Document 22 Step 85 processed (Perplexity: 12.30).\n",
      "Document 64 Step 85 processed (Perplexity: 11.06).\n",
      "Document 20 Step 85 processed (Perplexity: 8.21).\n",
      "\n",
      "\n",
      "Document 97 Step 86 processed (Perplexity: 12.73).\n",
      "Document 69 Step 86 processed (Perplexity: 11.40).\n",
      "Document 77 Step 86 processed (Perplexity: 4.68).\n",
      "Document 62 Step 86 processed (Perplexity: 8.64).\n",
      "Document 59 Step 86 processed (Perplexity: 7.19).\n",
      "Document 82 Step 86 processed (Perplexity: 11.79).\n",
      "Document 99 Step 86 processed (Perplexity: 9.67).\n",
      "Document 14 Step 86 processed (Perplexity: 18.44).\n",
      "Document 71 Step 86 processed (Perplexity: 9.21).\n",
      "Document 50 Step 86 processed (Perplexity: 11.92).\n",
      "Average loss at step 86: 2.381325\n",
      "\tPerplexity at step 86: 10.819227\n",
      "\n",
      "Valid Perplexity: 46.32\n",
      "\n",
      "Generated Text after epoch 85 ... \n",
      "======================== New text Segment ==========================\n",
      "\t  and said, like a golder the king he saw had been the king and said, \"the king, and thought to the father he said so the king her you shall the king, and the gom to the seed the king again the seak to her was to her father and in the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king again, and the king of the king aga\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 91 Step 87 processed (Perplexity: 16.22).\n",
      "Document 23 Step 87 processed (Perplexity: 8.18).\n",
      "Document 57 Step 87 processed (Perplexity: 12.32).\n",
      "Document 24 Step 87 processed (Perplexity: 10.89).\n",
      "Document 55 Step 87 processed (Perplexity: 13.55).\n",
      "Document 28 Step 87 processed (Perplexity: 8.86).\n",
      "Document 8 Step 87 processed (Perplexity: 12.47).\n",
      "Document 58 Step 87 processed (Perplexity: 4.47).\n",
      "Document 16 Step 87 processed (Perplexity: 11.27).\n",
      "Document 76 Step 87 processed (Perplexity: 12.35).\n",
      "\n",
      "\n",
      "Document 91 Step 88 processed (Perplexity: 14.49).\n",
      "Document 84 Step 88 processed (Perplexity: 11.27).\n",
      "Document 26 Step 88 processed (Perplexity: 18.28).\n",
      "Document 38 Step 88 processed (Perplexity: 10.90).\n",
      "Document 48 Step 88 processed (Perplexity: 9.91).\n",
      "Document 90 Step 88 processed (Perplexity: 12.39).\n",
      "Document 9 Step 88 processed (Perplexity: 13.23).\n",
      "Document 43 Step 88 processed (Perplexity: 11.67).\n",
      "Document 54 Step 88 processed (Perplexity: 11.25).\n",
      "Document 2 Step 88 processed (Perplexity: 19.17).\n",
      "\n",
      "\n",
      "Document 67 Step 89 processed (Perplexity: 10.53).\n",
      "Document 93 Step 89 processed (Perplexity: 8.63).\n",
      "Document 21 Step 89 processed (Perplexity: 15.16).\n",
      "Document 99 Step 89 processed (Perplexity: 6.65).\n",
      "Document 80 Step 89 processed (Perplexity: 11.11).\n",
      "Document 75 Step 89 processed (Perplexity: 12.92).\n",
      "Document 57 Step 89 processed (Perplexity: 11.36).\n",
      "Document 15 Step 89 processed (Perplexity: 12.56).\n",
      "Document 8 Step 89 processed (Perplexity: 10.63).\n",
      "Document 32 Step 89 processed (Perplexity: 11.13).\n",
      "\n",
      "\n",
      "Document 40 Step 90 processed (Perplexity: 14.64).\n",
      "Document 21 Step 90 processed (Perplexity: 13.11).\n",
      "Document 46 Step 90 processed (Perplexity: 19.25).\n",
      "Document 86 Step 90 processed (Perplexity: 9.99).\n",
      "Document 87 Step 90 processed (Perplexity: 7.69).\n",
      "Document 14 Step 90 processed (Perplexity: 15.89).\n",
      "Document 7 Step 90 processed (Perplexity: 12.76).\n",
      "Document 56 Step 90 processed (Perplexity: 8.27).\n",
      "Document 10 Step 90 processed (Perplexity: 10.72).\n",
      "Document 51 Step 90 processed (Perplexity: 7.14).\n",
      "\n",
      "\n",
      "Document 76 Step 91 processed (Perplexity: 12.22).\n",
      "Document 8 Step 91 processed (Perplexity: 10.37).\n",
      "Document 11 Step 91 processed (Perplexity: 16.28).\n",
      "Document 23 Step 91 processed (Perplexity: 7.87).\n",
      "Document 97 Step 91 processed (Perplexity: 13.80).\n",
      "Document 71 Step 91 processed (Perplexity: 8.95).\n",
      "Document 83 Step 91 processed (Perplexity: 5.46).\n",
      "Document 12 Step 91 processed (Perplexity: 12.19).\n",
      "Document 29 Step 91 processed (Perplexity: 12.17).\n",
      "Document 33 Step 91 processed (Perplexity: 13.38).\n",
      "Average loss at step 91: 2.421858\n",
      "\tPerplexity at step 91: 11.266772\n",
      "\n",
      "Valid Perplexity: 42.40\n",
      "\n",
      "Generated Text after epoch 90 ... \n",
      "======================== New text Segment ==========================\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t too so them ther there with the behand, and the father have to him, and the father again.  the childy the because him the father and did not down in the foress the face, the table withhis fast, and the tailorred the blew, and on the children the table, and said, they were once they like the table, and conceive the taken he was them out out with your father mother that it that then the father them in the father, and he was to the childed to him the becan, and strawelf, and them that the fox.  they were thumblinged coff the father them in the foresthing that if you will did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did not did\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 73 Step 92 processed (Perplexity: 14.93).\n",
      "Document 68 Step 92 processed (Perplexity: 13.32).\n",
      "Document 33 Step 92 processed (Perplexity: 8.68).\n",
      "Document 72 Step 92 processed (Perplexity: 12.09).\n",
      "Document 18 Step 92 processed (Perplexity: 9.29).\n",
      "Document 28 Step 92 processed (Perplexity: 7.83).\n",
      "Document 44 Step 92 processed (Perplexity: 11.51).\n",
      "Document 6 Step 92 processed (Perplexity: 12.88).\n",
      "Document 88 Step 92 processed (Perplexity: 12.67).\n",
      "Document 27 Step 92 processed (Perplexity: 14.08).\n",
      "\n",
      "\n",
      "Document 47 Step 93 processed (Perplexity: 8.93).\n",
      "Document 57 Step 93 processed (Perplexity: 10.97).\n",
      "Document 56 Step 93 processed (Perplexity: 7.37).\n",
      "Document 55 Step 93 processed (Perplexity: 13.81).\n",
      "Document 51 Step 93 processed (Perplexity: 5.59).\n",
      "Document 87 Step 93 processed (Perplexity: 7.48).\n",
      "Document 93 Step 93 processed (Perplexity: 9.09).\n",
      "Document 72 Step 93 processed (Perplexity: 11.91).\n",
      "Document 40 Step 93 processed (Perplexity: 14.15).\n",
      "Document 64 Step 93 processed (Perplexity: 12.83).\n",
      "\n",
      "\n",
      "Document 24 Step 94 processed (Perplexity: 11.04).\n",
      "Document 61 Step 94 processed (Perplexity: 16.73).\n",
      "Document 86 Step 94 processed (Perplexity: 9.70).\n",
      "Document 93 Step 94 processed (Perplexity: 6.96).\n",
      "Document 57 Step 94 processed (Perplexity: 10.56).\n",
      "Document 83 Step 94 processed (Perplexity: 4.61).\n",
      "Document 23 Step 94 processed (Perplexity: 8.36).\n",
      "Document 59 Step 94 processed (Perplexity: 7.13).\n",
      "Document 58 Step 94 processed (Perplexity: 4.91).\n",
      "Document 48 Step 94 processed (Perplexity: 10.90).\n",
      "\n",
      "\n",
      "Document 38 Step 95 processed (Perplexity: 11.61).\n",
      "Document 32 Step 95 processed (Perplexity: 12.24).\n",
      "Document 33 Step 95 processed (Perplexity: 12.04).\n",
      "Document 45 Step 95 processed (Perplexity: 14.07).\n",
      "Document 29 Step 95 processed (Perplexity: 12.16).\n",
      "Document 79 Step 95 processed (Perplexity: 9.57).\n",
      "Document 9 Step 95 processed (Perplexity: 14.22).\n",
      "Document 78 Step 95 processed (Perplexity: 13.74).\n",
      "Document 31 Step 95 processed (Perplexity: 6.62).\n",
      "Document 35 Step 95 processed (Perplexity: 17.02).\n",
      "\n",
      "\n",
      "Document 5 Step 96 processed (Perplexity: 14.58).\n",
      "Document 76 Step 96 processed (Perplexity: 11.51).\n",
      "Document 79 Step 96 processed (Perplexity: 5.92).\n",
      "Document 40 Step 96 processed (Perplexity: 13.71).\n",
      "Document 32 Step 96 processed (Perplexity: 10.04).\n",
      "Document 58 Step 96 processed (Perplexity: 4.23).\n",
      "Document 17 Step 96 processed (Perplexity: 12.85).\n",
      "Document 81 Step 96 processed (Perplexity: 22.07).\n",
      "Document 50 Step 96 processed (Perplexity: 13.12).\n",
      "Document 77 Step 96 processed (Perplexity: 5.17).\n",
      "Average loss at step 96: 2.332110\n",
      "\tPerplexity at step 96: 10.299655\n",
      "\n",
      "Valid Perplexity: 69.31\n",
      "\n",
      "Generated Text after epoch 95 ... \n",
      "======================== New text Segment ==========================\n",
      "\t rew he whoso the stood but the should stood to stoem\n",
      "look to stop her to whon he whole had\n",
      "stop and itched his poor standfathered the pop it.  and who was of the pop it.  and wholed it.  and who was of the pop it.  and the king still stopped the pon the pot to stop strey togethere to stop stat, and said, and then stop, and whoweress with the pot, and then the pop one stard, and then the pop to cook.  the pont was wook.  the pot, when the pot, when the pop it.  and the king still stopped the pon the pot to stop strey togethere to stop stat, and said, and then stop, and whoweress with the pot, and then the pop one stard, and then the pop to cook.  the pont was wook.  the pot, when the pot, when the pop it.  and the king still stopped the pon the pot to stop strey togethere to stop stat, and said, and then stop, and whoweress with the pot, and then the pop one stard, and then the pop to cook.  the pont was wook.  the pot, when the pot, when the pop it.  and the king still stopped the pon the pot to stop strey togethere to stop stat, and said, and then stop, and whoweress with the pot, and then the pop one stard, and then the pop to cook.  the pont was wook.  the pot, when the pot, when the pop it.  and the king still stopped the pon the pot to stop strey togethere to stop stat, and said, and then stop, and whoweress with the pot, and then the pop one stard, and then the pop to cook.  the pont was wook.  the pot, when the pot, when the pop it.  and the king still stopped the pon the pot to stop strey togethere to stop stat, and said, and then stop, and whoweress with the pot, and then the pop one stard, and then the pop to cook.  the pont was wook.  the pot, when the pot, when the pop it.  and the king still stopped the pon the pot to stop strey togethere to stop stat, and said, and then stop, and whoweress with the pot, and then the pop one stard, and then the pop to cook.  the pont was wook.  the pot, when the pot, when the pop it.  and the king still stopped the pon th\n",
      "====================================================================\n",
      "\n",
      "\n",
      "\n",
      "Document 69 Step 97 processed (Perplexity: 12.72).\n",
      "Document 62 Step 97 processed (Perplexity: 8.79).\n",
      "Document 84 Step 97 processed (Perplexity: 12.44).\n",
      "Document 66 Step 97 processed (Perplexity: 15.90).\n",
      "Document 97 Step 97 processed (Perplexity: 12.08).\n",
      "Document 58 Step 97 processed (Perplexity: 3.58).\n",
      "Document 92 Step 97 processed (Perplexity: 9.89).\n",
      "Document 19 Step 97 processed (Perplexity: 11.92).\n",
      "Document 31 Step 97 processed (Perplexity: 6.09).\n",
      "Document 45 Step 97 processed (Perplexity: 14.53).\n",
      "\n",
      "\n",
      "Document 37 Step 98 processed (Perplexity: 13.22).\n",
      "Document 0 Step 98 processed (Perplexity: 11.43).\n",
      "Document 86 Step 98 processed (Perplexity: 9.45).\n",
      "Document 93 Step 98 processed (Perplexity: 8.12).\n",
      "Document 16 Step 98 processed (Perplexity: 11.18).\n",
      "Document 50 Step 98 processed (Perplexity: 11.49).\n",
      "Document 25 Step 98 processed (Perplexity: 10.75).\n",
      "Document 4 Step 98 processed (Perplexity: 17.28).\n",
      "Document 31 Step 98 processed (Perplexity: 4.87).\n",
      "Document 67 Step 98 processed (Perplexity: 11.30).\n",
      "\n",
      "\n",
      "Document 49 Step 99 processed (Perplexity: 11.65).\n",
      "Document 54 Step 99 processed (Perplexity: 11.49).\n",
      "Document 67 Step 99 processed (Perplexity: 7.85).\n",
      "Document 68 Step 99 processed (Perplexity: 13.25).\n",
      "Document 28 Step 99 processed (Perplexity: 7.92).\n",
      "Document 25 Step 99 processed (Perplexity: 8.86).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Document 46 Step 99 processed (Perplexity: 20.43).\n",
      "Document 39 Step 99 processed (Perplexity: 8.21).\n",
      "Document 55 Step 99 processed (Perplexity: 13.14).\n",
      "Document 87 Step 99 processed (Perplexity: 7.19).\n",
      "\n",
      "\n",
      "Document 75 Step 100 processed (Perplexity: 14.18).\n",
      "Document 33 Step 100 processed (Perplexity: 12.19).\n",
      "Document 77 Step 100 processed (Perplexity: 4.62).\n",
      "Document 35 Step 100 processed (Perplexity: 15.43).\n",
      "Document 39 Step 100 processed (Perplexity: 6.73).\n",
      "Document 30 Step 100 processed (Perplexity: 8.96).\n",
      "Document 99 Step 100 processed (Perplexity: 10.21).\n",
      "Document 52 Step 100 processed (Perplexity: 9.61).\n",
      "Document 0 Step 100 processed (Perplexity: 11.41).\n",
      "Document 34 Step 100 processed (Perplexity: 13.22).\n"
     ]
    }
   ],
   "source": [
    "num_steps = 50 # Number of steps we run the algorithm for\n",
    "# How many training steps are performed for each document in a single step\n",
    "steps_per_document = 100 \n",
    "\n",
    "# How often we run validation\n",
    "valid_summary = 5\n",
    "\n",
    "# In the book we run tests with this set to both 20 and 100\n",
    "train_doc_count = 100\n",
    "train_docs_to_use =10 # Number of docs we use in a single step\n",
    "\n",
    "# Store the training and validation perplexity at each step\n",
    "valid_perpelxity_ot = []\n",
    "train_perplexity_ot = []\n",
    "\n",
    "session = tf.InteractiveSession()\n",
    "# Initializing variables\n",
    "tf.global_variables_initializer().run()\n",
    "\n",
    "print('Initialized')\n",
    "average_loss = 0\n",
    "\n",
    "# We use the first 10 documents that has \n",
    "# more than (num_steps+1)*steps_per_document bigrams for creating the validation dataset\n",
    "\n",
    "# Identify the first 10 documents following the above condition\n",
    "long_doc_ids = []\n",
    "for di in range(num_files):\n",
    "  if len(data_list[di])>(num_steps+1)*steps_per_document:\n",
    "    long_doc_ids.append(di)\n",
    "  if len(long_doc_ids)==10:\n",
    "    break\n",
    "\n",
    "# Generating validation data\n",
    "data_gens = []\n",
    "valid_data = []\n",
    "for fi in range(num_files):\n",
    "  # Get all the bigrams if the document id is not in the validation document ids\n",
    "  if fi not in long_doc_ids:\n",
    "    data_gens.append(DataGeneratorOHE(data_list[fi],batch_size,num_unroll))\n",
    "  # if the document is in the validation doc ids, only get up to the \n",
    "  # last steps_per_document bigrams and use the last steps_per_document bigrams as validation data\n",
    "  else:\n",
    "    data_gens.append(DataGeneratorOHE(data_list[fi][:len(data_list[fi])-steps_per_document],batch_size,num_unroll))\n",
    "    valid_data.extend(data_list[fi][-steps_per_document:])\n",
    "\n",
    "# Defining the validation data generator\n",
    "valid_gen = DataGeneratorOHE(valid_data,1,1)\n",
    "\n",
    "feed_dict = {}\n",
    "for step in range(num_steps):\n",
    "    print('\\n')\n",
    "    for di in np.random.permutation(train_doc_count)[:train_docs_to_use]:                    \n",
    "        doc_perplexity = 0\n",
    "        for doc_step_id in range(steps_per_document):\n",
    "            \n",
    "            # Get a set of unrolled batches\n",
    "            u_data, u_labels = data_gens[di].unroll_batches()\n",
    "            \n",
    "            # Populate the feed dict by using each of the data batches\n",
    "            # present in the unrolled data\n",
    "            for ui,(dat,lbl) in enumerate(zip(u_data,u_labels)):            \n",
    "                feed_dict[train_dataset[ui]]=dat\n",
    "                feed_dict[train_labels[ui]] = lbl            \n",
    "            \n",
    "            # Running the TensorFlow operation\n",
    "            _, l, step_predictions, _, step_labels, step_perplexity = \\\n",
    "            session.run([rnn_optimizer, rnn_loss, y_predictions,\n",
    "                         train_dataset,train_labels,train_perplexity_without_exp], \n",
    "                        feed_dict=feed_dict)\n",
    "            \n",
    "            # Update doc perplexity variable\n",
    "            doc_perplexity += step_perplexity\n",
    "            # Update average step perplexity \n",
    "            average_loss += step_perplexity\n",
    "                \n",
    "        print('Document %d Step %d processed (Perplexity: %.2f).'\n",
    "              %(di,step+1,np.exp(doc_perplexity/steps_per_document))\n",
    "             )\n",
    "        \n",
    "    # resetting hidden state after processing a single document\n",
    "    session.run(training_reset_ops) \n",
    "    \n",
    "    # Validation phase\n",
    "    if step % valid_summary == 0:\n",
    "      \n",
    "      # Compute average loss\n",
    "      average_loss = average_loss / (train_docs_to_use*steps_per_document*valid_summary)\n",
    "      \n",
    "      print('Average loss at step %d: %f' % (step+1, average_loss))\n",
    "      print('\\tPerplexity at step %d: %f' %(step+1, np.exp(average_loss)))\n",
    "      train_perplexity_ot.append(np.exp(average_loss))\n",
    "      \n",
    "      average_loss = 0 # reset loss\n",
    "      \n",
    "      valid_loss = 0 # reset loss\n",
    "      \n",
    "      # calculate valid perplexity\n",
    "      for v_step in range(steps_per_document*10):\n",
    "        uvalid_data,uvalid_labels = valid_gen.unroll_batches()        \n",
    "        \n",
    "        # Run validation phase related TensorFlow operations\n",
    "        v_loss,v_preds,v_labels,v_preplexity = session.run(\n",
    "            [rnn_valid_loss,valid_predictions,valid_labels, valid_perplexity_without_exp],\n",
    "            feed_dict = {valid_dataset:uvalid_data[0],valid_labels: uvalid_labels[0]}\n",
    "        )\n",
    "        \n",
    "        # Update validation perplexity\n",
    "        valid_loss += v_preplexity        \n",
    "      \n",
    "      # Reset validation data generator cursor\n",
    "      valid_gen.reset_indices()  \n",
    "    \n",
    "      print()\n",
    "      print(\"Valid Perplexity: %.2f\\n\"%np.exp(valid_loss/(steps_per_document*10)))\n",
    "      valid_perpelxity_ot.append(np.exp(valid_loss/(steps_per_document*10)))\n",
    "      session.run(valid_reset_ops)\n",
    "        \n",
    "      # Generating new text ...\n",
    "      # We will be generating one segment having 1000 bigrams\n",
    "      # Feel free to generate several segments by changing\n",
    "      # the value of segments_to_generate\n",
    "      print('Generated Text after epoch %d ... '%step)  \n",
    "      segments_to_generate = 1\n",
    "      chars_in_segment = 1000\n",
    "    \n",
    "      for _ in range(segments_to_generate):\n",
    "        print('======================== New text Segment ==========================')\n",
    "        # Start with a random word\n",
    "        test_word = np.zeros((1,input_sizes[0]),dtype=np.float32)\n",
    "        test_word[0,data_list[np.random.randint(0,num_files)][np.random.randint(0,500)]] = 1.0\n",
    "        print(\"\\t\",reverse_dictionary[np.argmax(test_word[0])],end='')\n",
    "        \n",
    "        # Generating words within a segment by feeding in the previous prediction\n",
    "        # as the current input in a recursive manner\n",
    "        for _ in range(chars_in_segment):    \n",
    "          test_pred = session.run(test_predictions, feed_dict = {test_dataset:test_word})  \n",
    "          next_ind = sample(test_pred.ravel())\n",
    "          test_word = np.zeros((1,input_sizes[0]),dtype=np.float32)\n",
    "          test_word[0,next_ind] = 1.0\n",
    "          print(reverse_dictionary[next_ind],end='')\n",
    "        \n",
    "        print(\"\")\n",
    "        # Reset test state\n",
    "        session.run(test_reset_ops)\n",
    "        print('====================================================================')\n",
    "      print(\"\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
